{"id":"https://openalex.org/W3081279708","doi":"https://doi.org/10.1145/3394486.3403249","title":"DeepSinger: Singing Voice Synthesis with Data Mined From the Web","display_name":"DeepSinger: Singing Voice Synthesis with Data Mined From the Web","publication_year":2020,"publication_date":"2020-08-20","ids":{"openalex":"https://openalex.org/W3081279708","doi":"https://doi.org/10.1145/3394486.3403249","mag":"3081279708"},"language":"en","primary_location":{"id":"doi:10.1145/3394486.3403249","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394486.3403249","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088179161","display_name":"Yi Ren","orcid":"https://orcid.org/0000-0001-9889-5460"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yi Ren","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101522530","display_name":"Xu Tan","orcid":"https://orcid.org/0000-0001-5631-0639"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Tan","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020025718","display_name":"Tao Qin","orcid":"https://orcid.org/0000-0002-9095-0776"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Qin","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054843960","display_name":"Jian Luan","orcid":"https://orcid.org/0000-0002-2383-226X"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Luan","raw_affiliation_strings":["Microsoft STC Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft STC Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079260216","display_name":"Zhou Zhao","orcid":"https://orcid.org/0000-0001-6121-0384"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Zhao","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101884287","display_name":"Tie\u2010Yan Liu","orcid":"https://orcid.org/0000-0002-0476-8020"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tie-Yan Liu","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5088179161"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":6.7604,"has_fulltext":false,"cited_by_count":71,"citation_normalized_percentile":{"value":0.97406921,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1979","last_page":"1989"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.9468931555747986},{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.8527400493621826},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7677515745162964},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6386972665786743},{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.6382277607917786},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3599446713924408},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.15576386451721191},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.13243934512138367}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.9468931555747986},{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.8527400493621826},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7677515745162964},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6386972665786743},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.6382277607917786},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3599446713924408},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.15576386451721191},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.13243934512138367},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3394486.3403249","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394486.3403249","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7200000286102295}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1904711963","https://openalex.org/W2007815473","https://openalex.org/W2108862644","https://openalex.org/W2120847449","https://openalex.org/W2134387846","https://openalex.org/W2150658333","https://openalex.org/W2284628133","https://openalex.org/W2482558056","https://openalex.org/W2515336442","https://openalex.org/W2736648940","https://openalex.org/W2747874407","https://openalex.org/W2767052532","https://openalex.org/W2778460379","https://openalex.org/W2889244839","https://openalex.org/W2902351815","https://openalex.org/W2963403868","https://openalex.org/W2963975282","https://openalex.org/W2964243274","https://openalex.org/W6664164299"],"related_works":["https://openalex.org/W2360952181","https://openalex.org/W2597614303","https://openalex.org/W634160686","https://openalex.org/W4310670065","https://openalex.org/W3214861561","https://openalex.org/W2389838651","https://openalex.org/W2378183644","https://openalex.org/W2287414930","https://openalex.org/W4288088492","https://openalex.org/W2982290989"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,50],"develop":[4],"DeepSinger,":[5],"a":[6,52,78,85,152,171,179],"multi-lingual":[7,79],"multi-singer":[8,80],"singing":[9,20,38,47,81,148,183,190,204,229,240],"voice":[10,249],"synthesis":[11,168],"(SVS)":[12],"system,":[13],"which":[14],"is":[15,115,155],"built":[16],"from":[17,24,67,92,125,181,213,233],"scratch":[18],"using":[19,97],"training":[21,123],"data":[22,36,44,124,212,230],"mined":[23,203,232],"music":[25,126],"websites.":[26],"The":[27,223],"pipeline":[28],"of":[29,32,61,111,178,208,244],"DeepSinger":[30,99,200,236],"consists":[31,207],"several":[33,101],"steps,":[34],"including":[35],"crawling,":[37],"and":[39,46,75,94,141,157,169,185,195,221,248],"accompaniment":[40],"separation,":[41],"lyrics-to-singing":[42,53,130],"alignment,":[43],"filtration,":[45],"modeling.":[48],"Specifically,":[49],"design":[51,77],"alignment":[54,131,139],"model":[55,82,132,149],"to":[56,71,88,108,174],"automatically":[57],"extract":[58],"the":[59,109,116,129,147,161,176,228,234],"duration":[60],"each":[62],"phoneme":[63,73],"in":[64,166,192,242,256],"lyrics":[65],"starting":[66],"coarse-grained":[68],"sentence":[69],"level":[70],"fine-grained":[72],"level,":[74],"further":[76,133],"based":[83,150],"on":[84,151,201,216],"feed-forward":[86,153],"Transformer":[87,154],"directly":[89,121],"generate":[90],"linear-spectrograms":[91],"lyrics,":[93],"synthesize":[95,189,238],"voices":[96,191,241],"Griffn-Lim.":[98],"has":[100],"advantages":[102],"over":[103],"previous":[104],"SVS":[105,118],"systems:":[106],"1)":[107],"best":[110],"our":[112,202],"knowledge,":[113],"it":[114,187],"first":[117],"system":[119],"that":[120,206,226],"mines":[122],"websites,":[127],"2)":[128],"avoids":[134],"any":[135],"human":[136],"efforts":[137],"for":[138],"labeling":[140,144],"greatly":[142],"reduces":[143],"cost,":[145],"3)":[146],"simple":[156],"efficient,":[158],"by":[159],"removing":[160],"complicated":[162],"acoustic":[163],"feature":[164],"modeling":[165],"parametric":[167],"leveraging":[170],"reference":[172],"encoder":[173],"capture":[175],"timbre":[177],"singer":[180],"noisy":[182],"data,":[184],"4)":[186],"can":[188,237],"multiple":[193,196],"languages":[194,218],"singers.":[197],"We":[198],"evaluate":[199],"dataset":[205],"about":[209],"92":[210],"hours":[211],"89":[214],"singers":[215],"three":[217],"(Chinese,":[219],"Cantonese":[220],"English).":[222],"results":[224],"demonstrate":[225],"with":[227],"purely":[231],"Web,":[235],"high-quality":[239],"terms":[243],"both":[245],"pitch":[246],"accuracy":[247],"naturalness.":[250],"Our":[251],"audio":[252],"samples":[253],"are":[254],"shown":[255],"https://speechresearch.github.io/deepsinger/.":[257]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":17},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":16},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
