{"id":"https://openalex.org/W4225463329","doi":"https://doi.org/10.21437/interspeech.2022-978","title":"SingAug: Data Augmentation for Singing Voice Synthesis with Cycle-consistent Training Strategy","display_name":"SingAug: Data Augmentation for Singing Voice Synthesis with Cycle-consistent Training Strategy","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4225463329","doi":"https://doi.org/10.21437/interspeech.2022-978"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-978","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-978","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048900402","display_name":"Shuai Guo","orcid":"https://orcid.org/0000-0002-9368-0548"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuai Guo","raw_affiliation_strings":["School of Information, Renmin University of China, P.R.China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, P.R.China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101063912","display_name":"Jiatong Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiatong Shi","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University, U.S.A"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University, U.S.A","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054269959","display_name":"Tao Qian","orcid":"https://orcid.org/0000-0002-8969-906X"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Qian","raw_affiliation_strings":["School of Information, Renmin University of China, P.R.China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, P.R.China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University, U.S.A"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University, U.S.A","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102462376","display_name":"Qin Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qin Jin","raw_affiliation_strings":["School of Information, Renmin University of China, P.R.China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, P.R.China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5048900402"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":0.835,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.73334393,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4272","last_page":"4276"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8319270610809326},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.693983793258667},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6445544362068176},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6225886344909668},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.495260626077652},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2360001802444458},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.11955767869949341}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8319270610809326},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.693983793258667},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6445544362068176},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6225886344909668},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.495260626077652},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2360001802444458},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.11955767869949341},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-978","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-978","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6499999761581421,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2471520273","https://openalex.org/W2515336442","https://openalex.org/W2765407302","https://openalex.org/W2778460379","https://openalex.org/W2907262790","https://openalex.org/W2921576841","https://openalex.org/W2937242376","https://openalex.org/W2940405045","https://openalex.org/W2946200149","https://openalex.org/W2964243274","https://openalex.org/W2972910332","https://openalex.org/W2973046048","https://openalex.org/W2984106626","https://openalex.org/W2995670387","https://openalex.org/W2997430147","https://openalex.org/W3015437531","https://openalex.org/W3015499232","https://openalex.org/W3015516707","https://openalex.org/W3015645837","https://openalex.org/W3035577668","https://openalex.org/W3081279708","https://openalex.org/W3092028330","https://openalex.org/W3097152652","https://openalex.org/W3097514409","https://openalex.org/W3097797284","https://openalex.org/W3114301328","https://openalex.org/W3133525064","https://openalex.org/W3151955268","https://openalex.org/W3161492781","https://openalex.org/W3161890269","https://openalex.org/W3163839574","https://openalex.org/W3212754542","https://openalex.org/W4221167708","https://openalex.org/W4280572880","https://openalex.org/W4287672314","https://openalex.org/W4296068763","https://openalex.org/W4298017177","https://openalex.org/W4308860114","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390529913","https://openalex.org/W2142368101","https://openalex.org/W2372249404","https://openalex.org/W2367547137","https://openalex.org/W2354994102","https://openalex.org/W2387733758","https://openalex.org/W4394050964","https://openalex.org/W2551249631"],"abstract_inverted_index":{"Deep":[0],"learning":[1],"based":[2,23,65],"singing":[3,14,36,86],"voice":[4],"synthesis":[5],"(SVS)":[6],"systems":[7,26],"have":[8,31],"been":[9],"demonstrated":[10],"to":[11,19,33,52,63],"flexibly":[12],"generate":[13],"with":[15,38],"better":[16],"qualities,":[17],"compared":[18],"conventional":[20],"statistical":[21],"parametric":[22],"methods.However,":[24],"neural":[25],"are":[27],"generally":[28],"datahungry":[29],"and":[30,69,94,107],"difficulty":[32],"reach":[34],"reasonable":[35],"quality":[37],"limited":[39],"public":[40,85],"available":[41],"training":[42,55,80,97],"data.In":[43],"this":[44],"work,":[45],"we":[46,76],"explore":[47],"different":[48],"data":[49],"augmentation":[50,68,92],"methods":[51,93],"boost":[53],"the":[54,74,78,95,102],"of":[56],"SVS":[57,64],"systems,":[58],"including":[59],"several":[60],"strategies":[61],"customized":[62],"on":[66,83,104],"pitch":[67],"mix-up":[70],"augmentation.To":[71],"further":[72],"stabilize":[73],"training,":[75],"introduce":[77],"cycle-consistent":[79],"strategy.Extensive":[81],"experiments":[82],"two":[84],"databases":[87],"demonstrate":[88],"that":[89],"our":[90],"proposed":[91],"stabilizing":[96],"strategy":[98],"can":[99],"significantly":[100],"improve":[101],"performance":[103],"both":[105],"objective":[106],"subjective":[108],"evaluations.":[109]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
