{"id":"https://openalex.org/W3193323418","doi":"https://doi.org/10.21437/ssw.2021-21","title":"Rapping-Singing Voice Synthesis based on Phoneme-level Prosody Control","display_name":"Rapping-Singing Voice Synthesis based on Phoneme-level Prosody Control","publication_year":2021,"publication_date":"2021-08-24","ids":{"openalex":"https://openalex.org/W3193323418","doi":"https://doi.org/10.21437/ssw.2021-21","mag":"3193323418"},"language":"en","primary_location":{"id":"doi:10.21437/ssw.2021-21","is_oa":false,"landing_page_url":"https://doi.org/10.21437/ssw.2021-21","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"11th ISCA Speech Synthesis Workshop (SSW 11)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2111.09146","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050072052","display_name":"\u039a\u03c9\u03bd\u03c3\u03c4\u03b1\u03bd\u03c4\u03af\u03bd\u03bf\u03c2 \u039c\u03b1\u03c1\u03ba\u03cc\u03c0\u03bf\u03c5\u03bb\u03bf\u03c2","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Konstantinos Markopoulos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052196585","display_name":"Nikolaos Ellinas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nikolaos Ellinas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015840293","display_name":"Alexandra Vioni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexandra Vioni","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061303593","display_name":"Myrsini Christidou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Myrsini Christidou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067728489","display_name":"Panos Kakoulidis","orcid":"https://orcid.org/0000-0003-2980-2528"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panos Kakoulidis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022310876","display_name":"Georgios Vamvoukakis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Georgios Vamvoukakis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110947099","display_name":"June Sig Sung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"June Sig Sung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006509336","display_name":"Hyoung-Min Park","orcid":"https://orcid.org/0000-0001-7397-8286"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyoungmin Park","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027494724","display_name":"Pirros Tsiakoulis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pirros Tsiakoulis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091595884","display_name":"Aimilios Chalamandaris","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aimilios Chalamandaris","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5037667852","display_name":"Georgia Maniati","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Georgia Maniati","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2799,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.63989634,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"118","last_page":"123"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8025040626525879},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7419010400772095},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.741521418094635},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7120249271392822},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6413350105285645},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.5912300944328308},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4416128098964691},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.43092572689056396},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.41004762053489685},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34113118052482605},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1912350356578827},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.11016833782196045},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10921204090118408}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8025040626525879},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7419010400772095},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.741521418094635},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7120249271392822},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6413350105285645},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.5912300944328308},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4416128098964691},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.43092572689056396},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.41004762053489685},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34113118052482605},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1912350356578827},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.11016833782196045},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10921204090118408},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/ssw.2021-21","is_oa":false,"landing_page_url":"https://doi.org/10.21437/ssw.2021-21","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"11th ISCA Speech Synthesis Workshop (SSW 11)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2111.09146","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.09146","pdf_url":"https://arxiv.org/pdf/2111.09146","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2111.09146","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.09146","pdf_url":"https://arxiv.org/pdf/2111.09146","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W29794711","https://openalex.org/W1525613233","https://openalex.org/W1525954971","https://openalex.org/W1749799732","https://openalex.org/W2095705004","https://openalex.org/W2148228080","https://openalex.org/W2150658333","https://openalex.org/W2168510624","https://openalex.org/W2409027918","https://openalex.org/W2516406502","https://openalex.org/W2536210537","https://openalex.org/W2778460379","https://openalex.org/W2946200149","https://openalex.org/W2946639766","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2972473628","https://openalex.org/W2973046048","https://openalex.org/W3015645837","https://openalex.org/W3021164770","https://openalex.org/W3081279708","https://openalex.org/W3082910224","https://openalex.org/W3095389792","https://openalex.org/W3095459301","https://openalex.org/W3095948607","https://openalex.org/W3096437652","https://openalex.org/W3097152652","https://openalex.org/W3133525064","https://openalex.org/W3163003432","https://openalex.org/W3216401400"],"related_works":["https://openalex.org/W169399214","https://openalex.org/W4391272374","https://openalex.org/W1914543332","https://openalex.org/W1984347656","https://openalex.org/W2946856121","https://openalex.org/W40885451","https://openalex.org/W10581632","https://openalex.org/W1927421023","https://openalex.org/W2108985546","https://openalex.org/W2081919107"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"a":[3,18,92,141],"text-to-rapping/singing":[4],"system":[5,76,146,162],"is":[6,55,77,118,147],"introduced,":[7],"which":[8,29,79,163],"can":[9,131,181],"be":[10,132],"adapted":[11],"to":[12,57,139,158,166],"any":[13],"speaker's":[14,60,100],"voice.":[15,70],"It":[16],"utilizes":[17],"Tacotron-based":[19],"multispeaker":[20],"acoustic":[21],"model":[22,54],"trained":[23],"on":[24,44],"read-only":[25,172],"speech":[26],"data":[27],"and":[28,39,87,95],"provides":[30],"prosody":[31,41],"control":[32],"at":[33],"the":[34,67,75,81,84,114,124,178],"phoneme":[35],"level.":[36],"Dataset":[37],"augmentation":[38],"additional":[40,108],"manipulation":[42,112],"based":[43],"traditional":[45],"DSP":[46],"algorithms":[47],"are":[48],"also":[49,119,164],"investigated.":[50],"The":[51,71,128,144],"neural":[52],"TTS":[53],"fine-tuned":[56],"an":[58,91,135,159],"unseen":[59],"limited":[61],"recordings,":[62],"allowing":[63],"rapping/singing":[64,185],"synthesis":[65],"with":[66,134,187],"target's":[68],"speaker":[69],"detailed":[72],"pipeline":[73],"of":[74,83,103,110,113],"described,":[78],"includes":[80],"extraction":[82],"target":[85,99,125],"pitch":[86],"duration":[88,126],"values":[89],"from":[90,171],"capella":[93],"song":[94],"their":[96],"conversion":[97],"into":[98],"valid":[101],"range":[102],"notes":[104],"before":[105],"synthesis.":[106],"An":[107],"stage":[109],"prosodic":[111],"output":[115],"via":[116,149],"WSOLA":[117],"investigated":[120],"for":[121],"better":[122],"matching":[123],"values.":[127],"synthesized":[129],"utterances":[130],"mixed":[133],"instrumental":[136],"accompaniment":[137],"track":[138],"produce":[140,167,182],"complete":[142],"song.":[143],"proposed":[145,179],"evaluated":[148],"subjective":[150],"listening":[151],"tests":[152],"as":[153,155],"well":[154],"in":[156],"comparison":[157],"available":[160],"alternate":[161],"aims":[165],"synthetic":[168],"singing":[169],"voice":[170,186],"training":[173],"data.":[174],"Results":[175],"show":[176],"that":[177],"approach":[180],"high":[183],"quality":[184],"increased":[188],"naturalness.":[189]},"counts_by_year":[{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
