{"id":"https://openalex.org/W4391021559","doi":"https://doi.org/10.1109/asru57964.2023.10389702","title":"Crosssinger: A Cross-Lingual Multi-Singer High-Fidelity Singing Voice Synthesizer Trained on Monolingual Singers","display_name":"Crosssinger: A Cross-Lingual Multi-Singer High-Fidelity Singing Voice Synthesizer Trained on Monolingual Singers","publication_year":2023,"publication_date":"2023-12-16","ids":{"openalex":"https://openalex.org/W4391021559","doi":"https://doi.org/10.1109/asru57964.2023.10389702"},"language":"en","primary_location":{"id":"doi:10.1109/asru57964.2023.10389702","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru57964.2023.10389702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102768876","display_name":"Xintong Wang","orcid":"https://orcid.org/0000-0002-3702-2380"},"institutions":[{"id":"https://openalex.org/I4210100976","display_name":"BOE Technology Group (China)","ror":"https://ror.org/01cwwvj38","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210100976"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xintong Wang","raw_affiliation_strings":["Beijing Bombax XiaoIce Technology Co., Ltd,China","Beijing Bombax XiaoIce Technology Co., Ltd, China"],"affiliations":[{"raw_affiliation_string":"Beijing Bombax XiaoIce Technology Co., Ltd,China","institution_ids":["https://openalex.org/I4210100976"]},{"raw_affiliation_string":"Beijing Bombax XiaoIce Technology Co., Ltd, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100654128","display_name":"Chang Zeng","orcid":"https://orcid.org/0000-0002-4882-1823"},"institutions":[{"id":"https://openalex.org/I4210110163","display_name":"Nippon Soken (Japan)","ror":"https://ror.org/01yk36x23","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210110163"]},{"id":"https://openalex.org/I200475212","display_name":"The Graduate University for Advanced Studies, SOKENDAI","ror":"https://ror.org/0516ah480","country_code":"JP","type":"education","lineage":["https://openalex.org/I200475212"]},{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Chang Zeng","raw_affiliation_strings":["National Institute of Informatics,Japan","SOKENDAI, Japan","National Institute of Informatics, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics,Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"SOKENDAI, Japan","institution_ids":["https://openalex.org/I4210110163","https://openalex.org/I200475212"]},{"raw_affiliation_string":"National Institute of Informatics, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022275946","display_name":"Jun Chen","orcid":"https://orcid.org/0000-0002-1966-0063"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Chen","raw_affiliation_strings":["Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100376538","display_name":"Chunhui Wang","orcid":"https://orcid.org/0000-0002-7151-483X"},"institutions":[{"id":"https://openalex.org/I4210100976","display_name":"BOE Technology Group (China)","ror":"https://ror.org/01cwwvj38","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210100976"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunhui Wang","raw_affiliation_strings":["Beijing Bombax XiaoIce Technology Co., Ltd,China","Beijing Bombax XiaoIce Technology Co., Ltd, China"],"affiliations":[{"raw_affiliation_string":"Beijing Bombax XiaoIce Technology Co., Ltd,China","institution_ids":["https://openalex.org/I4210100976"]},{"raw_affiliation_string":"Beijing Bombax XiaoIce Technology Co., Ltd, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102768876"],"corresponding_institution_ids":["https://openalex.org/I4210100976"],"apc_list":null,"apc_paid":null,"fwci":0.6993,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.7696387,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8783770799636841},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6736880540847778},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5855108499526978},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.534981906414032},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.5290101170539856},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.19739991426467896},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08959266543388367},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.04035267233848572}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8783770799636841},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6736880540847778},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5855108499526978},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.534981906414032},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.5290101170539856},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.19739991426467896},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08959266543388367},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.04035267233848572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru57964.2023.10389702","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru57964.2023.10389702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1731081199","https://openalex.org/W2067709094","https://openalex.org/W2339754110","https://openalex.org/W2494654097","https://openalex.org/W2593414223","https://openalex.org/W2603947633","https://openalex.org/W2666408839","https://openalex.org/W2747329762","https://openalex.org/W2887511658","https://openalex.org/W2940405045","https://openalex.org/W2946200149","https://openalex.org/W2963073614","https://openalex.org/W2963470893","https://openalex.org/W2964002616","https://openalex.org/W2964243274","https://openalex.org/W2970006822","https://openalex.org/W2972473628","https://openalex.org/W2973046048","https://openalex.org/W2973084242","https://openalex.org/W2990440871","https://openalex.org/W3015212790","https://openalex.org/W3015499232","https://openalex.org/W3027127570","https://openalex.org/W3082910224","https://openalex.org/W3092028330","https://openalex.org/W3096831136","https://openalex.org/W3097514409","https://openalex.org/W3159302906","https://openalex.org/W3206191467","https://openalex.org/W4307312879","https://openalex.org/W4385245566","https://openalex.org/W4385823074","https://openalex.org/W6631190155","https://openalex.org/W6637618735","https://openalex.org/W6739901393","https://openalex.org/W6763832098","https://openalex.org/W6767111847","https://openalex.org/W6777817972","https://openalex.org/W6778823374","https://openalex.org/W6783382068","https://openalex.org/W6783867762","https://openalex.org/W6790220310","https://openalex.org/W6846227501"],"related_works":["https://openalex.org/W4313443006","https://openalex.org/W2945374968","https://openalex.org/W4385452045","https://openalex.org/W4293777179","https://openalex.org/W2164070813","https://openalex.org/W2135608140","https://openalex.org/W2895525995","https://openalex.org/W4224231624","https://openalex.org/W2332512904","https://openalex.org/W2319626700"],"abstract_inverted_index":{"It":[0],"is":[1,31,84,102,110],"challenging":[2],"to":[3,46,63,86],"build":[4],"a":[5,32,113],"multi-singer":[6],"high-fidelity":[7,138],"singing":[8,34,117],"voice":[9,35,118],"synthesis":[10],"system":[11],"with":[12,105,143],"cross-lingual":[13,33,144],"ability":[14],"by":[15],"only":[16],"using":[17],"monolingual":[18],"singers":[19,75,95,142],"in":[20,91],"the":[21,48,54,65,69,106],"training":[22,55],"stage.":[23],"In":[24],"this":[25],"paper,":[26],"we":[27,41,58],"propose":[28],"CrossSinger,":[29],"which":[30,98],"synthesizer":[36],"based":[37],"on":[38,112],"Xiaoicesing2.":[39],"Specifically,":[40],"utilize":[42],"International":[43],"Phonetic":[44],"Alphabet":[45],"unify":[47],"representation":[49],"for":[50,71,140],"all":[51,94],"languages":[52],"of":[53,115],"data.":[56],"Moreover,":[57],"leverage":[59],"conditional":[60],"layer":[61,82],"normalization":[62],"incorporate":[64],"language":[66],"information":[67],"into":[68],"model":[70],"better":[72],"pronunciation":[73],"when":[74],"meet":[76],"unseen":[77],"languages.":[78],"Additionally,":[79],"gradient":[80],"reversal":[81],"(GRL)":[83],"utilized":[85],"remove":[87],"singer":[88],"biases":[89],"included":[90],"lyrics":[92],"since":[93],"are":[96],"monolingual,":[97],"indicates":[99],"singer\u2019s":[100],"identity":[101],"implicitly":[103],"associated":[104],"text.":[107],"The":[108,132],"experiment":[109],"conducted":[111],"combination":[114],"three":[116],"datasets":[119],"containing":[120],"Japanese":[121],"Kiritan":[122],"dataset,":[123,126],"English":[124],"NUS-48E":[125],"and":[127],"one":[128],"internal":[129],"Chinese":[130],"dataset.":[131],"result":[133],"shows":[134],"CrossSinger":[135],"can":[136],"synthesize":[137],"songs":[139],"various":[141],"ability,":[145],"including":[146],"code-switch":[147],"cases.":[148]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
