{"id":"https://openalex.org/W2047841731","doi":"https://doi.org/10.1109/iscslp.2012.6423491","title":"A study of F0 modelling and generation with lyrics and shape characterization for singing voice synthesis","display_name":"A study of F0 modelling and generation with lyrics and shape characterization for singing voice synthesis","publication_year":2012,"publication_date":"2012-12-01","ids":{"openalex":"https://openalex.org/W2047841731","doi":"https://doi.org/10.1109/iscslp.2012.6423491","mag":"2047841731"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2012.6423491","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2012.6423491","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 8th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063468997","display_name":"S. W. Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"S. W. Lee","raw_affiliation_strings":["Human Language Technology Department, Institute for Infocomm Research, ASTAR, Singapore","Human Language Technology Department, Institute for Infocomm Research, A*STAR, Singapore 138632"],"affiliations":[{"raw_affiliation_string":"Human Language Technology Department, Institute for Infocomm Research, ASTAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Human Language Technology Department, Institute for Infocomm Research, A*STAR, Singapore 138632","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021872761","display_name":"Minghui Dong","orcid":"https://orcid.org/0000-0001-6543-2929"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Minghui Dong","raw_affiliation_strings":["Human Language Technology Department, Institute for Infocomm Research, ASTAR, Singapore","Human Language Technology Department, Institute for Infocomm Research, A*STAR, Singapore 138632"],"affiliations":[{"raw_affiliation_string":"Human Language Technology Department, Institute for Infocomm Research, ASTAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Human Language Technology Department, Institute for Infocomm Research, A*STAR, Singapore 138632","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["Human Language Technology Department, Institute for Infocomm Research, ASTAR, Singapore","Human Language Technology Department, Institute for Infocomm Research, A*STAR, Singapore 138632"],"affiliations":[{"raw_affiliation_string":"Human Language Technology Department, Institute for Infocomm Research, ASTAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Human Language Technology Department, Institute for Infocomm Research, A*STAR, Singapore 138632","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5063468997"],"corresponding_institution_ids":["https://openalex.org/I115228651","https://openalex.org/I3005327000"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.10063338,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"e85 d","issue":null,"first_page":"150","last_page":"154"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.9314377307891846},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.891481876373291},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.8821600675582886},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6854100823402405},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6697092056274414},{"id":"https://openalex.org/keywords/fundamental-frequency","display_name":"Fundamental frequency","score":0.6469101905822754},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6433242559432983},{"id":"https://openalex.org/keywords/frequency-domain","display_name":"Frequency domain","score":0.5245835185050964},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4815512001514435},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4778313636779785},{"id":"https://openalex.org/keywords/voice","display_name":"Voice","score":0.46264445781707764},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.46020716428756714},{"id":"https://openalex.org/keywords/human-voice","display_name":"Human voice","score":0.425301730632782},{"id":"https://openalex.org/keywords/characterization","display_name":"Characterization (materials science)","score":0.4213162660598755},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.33225327730178833},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.09647318720817566},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.07210734486579895}],"concepts":[{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.9314377307891846},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.891481876373291},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.8821600675582886},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6854100823402405},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6697092056274414},{"id":"https://openalex.org/C10513763","wikidata":"https://www.wikidata.org/wiki/Q1331774","display_name":"Fundamental frequency","level":2,"score":0.6469101905822754},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6433242559432983},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.5245835185050964},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4815512001514435},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4778313636779785},{"id":"https://openalex.org/C552089266","wikidata":"https://www.wikidata.org/wiki/Q494510","display_name":"Voice","level":2,"score":0.46264445781707764},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.46020716428756714},{"id":"https://openalex.org/C20766975","wikidata":"https://www.wikidata.org/wiki/Q7390","display_name":"Human voice","level":2,"score":0.425301730632782},{"id":"https://openalex.org/C2780841128","wikidata":"https://www.wikidata.org/wiki/Q5073781","display_name":"Characterization (materials science)","level":2,"score":0.4213162660598755},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.33225327730178833},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.09647318720817566},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.07210734486579895},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C171250308","wikidata":"https://www.wikidata.org/wiki/Q11468","display_name":"Nanotechnology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp.2012.6423491","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2012.6423491","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 8th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W29794711","https://openalex.org/W49522132","https://openalex.org/W88081813","https://openalex.org/W111300696","https://openalex.org/W113106864","https://openalex.org/W202653786","https://openalex.org/W1512429158","https://openalex.org/W1531696119","https://openalex.org/W1560013842","https://openalex.org/W1599681080","https://openalex.org/W2029434926","https://openalex.org/W2095723991","https://openalex.org/W2106792148","https://openalex.org/W2115144768","https://openalex.org/W2124097505","https://openalex.org/W2131062138","https://openalex.org/W2136507169","https://openalex.org/W2146699317","https://openalex.org/W2150933458","https://openalex.org/W2154920538","https://openalex.org/W2166472876","https://openalex.org/W2488218815","https://openalex.org/W4252486833","https://openalex.org/W4285719527","https://openalex.org/W6601203246","https://openalex.org/W6630838124","https://openalex.org/W6635563254","https://openalex.org/W6678318511"],"related_works":["https://openalex.org/W4391272374","https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W40885451","https://openalex.org/W2108985546","https://openalex.org/W2081919107","https://openalex.org/W2433276473","https://openalex.org/W2535215250","https://openalex.org/W1537411440","https://openalex.org/W2024201202"],"abstract_inverted_index":{"Natural":[0],"pitch":[1],"fluctuation":[2,22],"is":[3,76,95,138,142],"essential":[4],"to":[5,35,78,98,100,124],"singing":[6,38,74],"voice.":[7],"Recently,":[8],"we":[9,42,61],"have":[10],"proposed":[11],"a":[12],"generalized":[13,91],"F0":[14,21,32,52,75,81,92,110,126],"modelling":[15,93],"method":[16,94],"which":[17,148],"models":[18],"the":[19,66,80,118,135],"expected":[20],"under":[23],"various":[24],"contexts":[25],"with":[26,45],"note":[27],"HMMs.":[28],"Knowing":[29],"that":[30,117],"having":[31],"contours":[33],"close":[34],"human":[36],"professional":[37],"promotes":[39],"perceived":[40],"quality,":[41],"are":[43],"confronted":[44],"two":[46,63],"requirements:":[47],"(1)":[48],"accurate":[49],"estimation":[50,111],"on":[51,73],"and":[53,82,128],"(2)":[54],"precise":[55],"voiced/unvoiced":[56],"decisions.":[57],"In":[58],"this":[59],"paper,":[60],"introduce":[62],"techniques":[64],"in":[65,105],"above":[67],"directions.":[68],"Influence":[69],"of":[70,107,120,131],"lyrics":[71,121],"phonetics":[72],"considered":[77],"capture":[79],"voicing":[83],"behaviour":[84],"brought":[85],"from":[86],"different":[87],"note-lyrics":[88],"combinations.":[89],"The":[90],"further":[96,150],"extended":[97],"frequency-domain":[99,136],"study":[101],"if":[102],"shape":[103],"characterization":[104],"terms":[106],"sinusoids":[108],"helps":[109],"or":[112],"not.":[113],"Our":[114],"experiments":[115],"showed":[116],"use":[119],"information":[122],"leads":[123],"better":[125],"generation":[127],"improves":[129],"naturalness":[130],"synthesized":[132],"singing.":[133],"While":[134],"representation":[137],"viable,":[139],"its":[140],"performance":[141],"less":[143],"competitive":[144],"than":[145],"time-domain":[146],"representation,":[147],"requires":[149],"study.":[151]},"counts_by_year":[{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
