{"id":"https://openalex.org/W3097514409","doi":"https://doi.org/10.21437/interspeech.2020-1410","title":"XiaoiceSing: A High-Quality and Integrated Singing Voice Synthesis System","display_name":"XiaoiceSing: A High-Quality and Integrated Singing Voice Synthesis System","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3097514409","doi":"https://doi.org/10.21437/interspeech.2020-1410","mag":"3097514409"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-1410","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1410","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023904816","display_name":"Peiling Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I97750245","display_name":"Software (Spain)","ror":"https://ror.org/02ethns06","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210087817","https://openalex.org/I97750245"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","ES"],"is_corresponding":true,"raw_author_name":"Peiling Lu","raw_affiliation_strings":["Xiaoice, Microsoft Software Technology Center Asia"],"affiliations":[{"raw_affiliation_string":"Xiaoice, Microsoft Software Technology Center Asia","institution_ids":["https://openalex.org/I4210113369","https://openalex.org/I97750245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100724761","display_name":"Jie Wu","orcid":"https://orcid.org/0000-0002-6511-3327"},"institutions":[{"id":"https://openalex.org/I97750245","display_name":"Software (Spain)","ror":"https://ror.org/02ethns06","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210087817","https://openalex.org/I97750245"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","ES"],"is_corresponding":false,"raw_author_name":"Jie Wu","raw_affiliation_strings":["Xiaoice, Microsoft Software Technology Center Asia"],"affiliations":[{"raw_affiliation_string":"Xiaoice, Microsoft Software Technology Center Asia","institution_ids":["https://openalex.org/I4210113369","https://openalex.org/I97750245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054843960","display_name":"Jian Luan","orcid":"https://orcid.org/0000-0002-2383-226X"},"institutions":[{"id":"https://openalex.org/I97750245","display_name":"Software (Spain)","ror":"https://ror.org/02ethns06","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210087817","https://openalex.org/I97750245"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","ES"],"is_corresponding":false,"raw_author_name":"Jian Luan","raw_affiliation_strings":["Xiaoice, Microsoft Software Technology Center Asia"],"affiliations":[{"raw_affiliation_string":"Xiaoice, Microsoft Software Technology Center Asia","institution_ids":["https://openalex.org/I4210113369","https://openalex.org/I97750245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018286848","display_name":"Xu Tan","orcid":"https://orcid.org/0000-0001-6123-4378"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Tan","raw_affiliation_strings":["Microsoft Research Asia"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048365280","display_name":"Li Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]},{"id":"https://openalex.org/I97750245","display_name":"Software (Spain)","ror":"https://ror.org/02ethns06","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210087817","https://openalex.org/I97750245"]}],"countries":["CN","ES"],"is_corresponding":false,"raw_author_name":"Li Zhou","raw_affiliation_strings":["Xiaoice, Microsoft Software Technology Center Asia"],"affiliations":[{"raw_affiliation_string":"Xiaoice, Microsoft Software Technology Center Asia","institution_ids":["https://openalex.org/I4210113369","https://openalex.org/I97750245"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5023904816"],"corresponding_institution_ids":["https://openalex.org/I4210113369","https://openalex.org/I97750245"],"apc_list":null,"apc_paid":null,"fwci":5.3807,"has_fulltext":false,"cited_by_count":63,"citation_normalized_percentile":{"value":0.96426266,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1306","last_page":"1310"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6881606578826904},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.5980249643325806},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5878152847290039},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44739001989364624},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.13135027885437012}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6881606578826904},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.5980249643325806},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5878152847290039},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44739001989364624},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.13135027885437012},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2020-1410","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1410","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6499999761581421,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W29794711","https://openalex.org/W59175527","https://openalex.org/W120415783","https://openalex.org/W1525613233","https://openalex.org/W2066598518","https://openalex.org/W2124097505","https://openalex.org/W2471520273","https://openalex.org/W2515336442","https://openalex.org/W2516406502","https://openalex.org/W2594814449","https://openalex.org/W2778460379","https://openalex.org/W2940405045","https://openalex.org/W2946200149","https://openalex.org/W2972910332","https://openalex.org/W2973046048","https://openalex.org/W3015437531","https://openalex.org/W3015499232","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390529913","https://openalex.org/W2142368101","https://openalex.org/W2372249404","https://openalex.org/W2367547137","https://openalex.org/W2354994102","https://openalex.org/W2387733758","https://openalex.org/W2376664795"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"XiaoiceSing,":[3],"a":[4,56,78],"high-quality":[5],"singing":[6],"voice":[7],"synthesis":[8],"system":[9,99],"which":[10,138],"employs":[11],"an":[12],"integrated":[13],"network":[14],"for":[15,89],"spectrum,":[16],"F0":[17,60,124],"and":[18,36,45,114,125,131],"duration":[19,66,72,87,126],"modeling.We":[20],"follow":[21],"the":[22,65,71,75,85,97,122,140],"main":[23],"architecture":[24],"of":[25,68,73,100,143],"FastSpeech":[26],"while":[27],"proposing":[28],"some":[29],"singing-specific":[30],"design:":[31],"1)":[32],"Besides":[33],"phoneme":[34],"ID":[35],"position":[37],"encoding,":[38],"features":[39],"from":[40],"musical":[41,79],"score":[42],"(e.g.note":[43],"pitch":[44],"length)":[46],"are":[47],"also":[48],"added.2)":[49],"To":[50],"attenuate":[51],"off-key":[52],"issues,":[53],"we":[54],"add":[55],"residual":[57],"connection":[58],"in":[59,77],"prediction.3)":[61],"In":[62],"addition":[63],"to":[64,83],"loss":[67,88],"each":[69],"phoneme,":[70],"all":[74],"phonemes":[76],"note":[80],"is":[81],"accumulated":[82],"calculate":[84],"syllable":[86],"rhythm":[90],"enhancement.Experiment":[91],"results":[92],"show":[93],"that":[94],"XiaoiceSing":[95],"outperforms":[96],"baseline":[98,136],"convolutional":[101],"neural":[102],"networks":[103],"by":[104],"1.44":[105],"MOS":[106],"on":[107,111,116],"sound":[108],"quality,":[109],"1.18":[110],"pronunciation":[112],"accuracy":[113],"1.38":[115],"naturalness":[117],"respectively.In":[118],"two":[119],"A/B":[120],"tests,":[121],"proposed":[123],"modeling":[127],"methods":[128],"achieve":[129],"97.3%":[130],"84.3%":[132],"preference":[133],"rate":[134],"over":[135],"respectively,":[137],"demonstrates":[139],"overwhelming":[141],"advantages":[142],"XiaoiceSing.":[144]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":12}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
