{"id":"https://openalex.org/W2922346874","doi":"https://doi.org/10.23919/apsipa.2018.8659502","title":"Sequential Generation of Singing F0 Contours from Musical Note Sequences Based on WaveNet","display_name":"Sequential Generation of Singing F0 Contours from Musical Note Sequences Based on WaveNet","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2922346874","doi":"https://doi.org/10.23919/apsipa.2018.8659502","mag":"2922346874"},"language":"en","primary_location":{"id":"doi:10.23919/apsipa.2018.8659502","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659502","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108387659","display_name":"Yusuke Wada","orcid":null},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yusuke Wada","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025832278","display_name":"Ryo Nishikimi","orcid":"https://orcid.org/0000-0002-3638-6115"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryo Nishikimi","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043140898","display_name":"Eita Nakamura","orcid":"https://orcid.org/0000-0003-4097-6027"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Eita Nakamura","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003325000","display_name":"Katsutoshi Itoyama","orcid":"https://orcid.org/0000-0002-7098-3896"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Katsutoshi Itoyama","raw_affiliation_strings":["School of Engineering, Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067956319","display_name":"Kazuyoshi Yoshii","orcid":"https://orcid.org/0000-0001-8387-8609"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuyoshi Yoshii","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5108387659"],"corresponding_institution_ids":["https://openalex.org/I22299242"],"apc_list":null,"apc_paid":null,"fwci":0.9909,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.77942054,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"983","last_page":"989"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8701843023300171},{"id":"https://openalex.org/keywords/vibrato","display_name":"Vibrato","score":0.7505475282669067},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7447206974029541},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7325262427330017},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.613269567489624},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.5548252463340759},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.5339594483375549},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5126132369041443},{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.47464925050735474},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.45140042901039124},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.2045707106590271},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1783408522605896}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8701843023300171},{"id":"https://openalex.org/C2781100714","wikidata":"https://www.wikidata.org/wiki/Q377435","display_name":"Vibrato","level":3,"score":0.7505475282669067},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7447206974029541},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7325262427330017},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.613269567489624},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.5548252463340759},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.5339594483375549},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5126132369041443},{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.47464925050735474},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.45140042901039124},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.2045707106590271},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1783408522605896},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/apsipa.2018.8659502","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659502","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.7099999785423279}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W29794711","https://openalex.org/W30043770","https://openalex.org/W1519655822","https://openalex.org/W1522301498","https://openalex.org/W1526336542","https://openalex.org/W1550297032","https://openalex.org/W1576227399","https://openalex.org/W1974421892","https://openalex.org/W1979449467","https://openalex.org/W2029337446","https://openalex.org/W2059443479","https://openalex.org/W2072389495","https://openalex.org/W2124097505","https://openalex.org/W2156012518","https://openalex.org/W2166472876","https://openalex.org/W2170667735","https://openalex.org/W2294797155","https://openalex.org/W2403321691","https://openalex.org/W2406252755","https://openalex.org/W2515336442","https://openalex.org/W2516406502","https://openalex.org/W2519091744","https://openalex.org/W2584032004","https://openalex.org/W2606176153","https://openalex.org/W2635912764","https://openalex.org/W2651834199","https://openalex.org/W2676493621","https://openalex.org/W2746474733","https://openalex.org/W2778460379","https://openalex.org/W2885029858","https://openalex.org/W2949382160","https://openalex.org/W2962896155","https://openalex.org/W2963840672","https://openalex.org/W2964121744","https://openalex.org/W2964243274","https://openalex.org/W3177493096","https://openalex.org/W6601203246","https://openalex.org/W6601248455","https://openalex.org/W6631117800","https://openalex.org/W6631190155","https://openalex.org/W6632757949","https://openalex.org/W6678318511","https://openalex.org/W6682952417","https://openalex.org/W6696085341","https://openalex.org/W6696843773","https://openalex.org/W6713544983","https://openalex.org/W6732429163","https://openalex.org/W6736723571","https://openalex.org/W6753762981","https://openalex.org/W6798415246"],"related_works":["https://openalex.org/W2373880408","https://openalex.org/W2378183644","https://openalex.org/W2287414930","https://openalex.org/W405661683","https://openalex.org/W2532856746","https://openalex.org/W238390473","https://openalex.org/W2053269318","https://openalex.org/W2119168266","https://openalex.org/W4230480656","https://openalex.org/W2978145617"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"a":[3,8,13,17,27,111,117,149,153],"method":[4,140],"that":[5,156,188],"can":[6],"generate":[7,73],"continuous":[9],"F0":[10,35,67,75,108,133,162,196],"contour":[11],"of":[12,20,83,128,148,194],"singing":[14,45,107,132],"voice":[15],"from":[16,110],"monophonic":[18],"sequence":[19],"musical":[21,112,150],"notes":[22],"(musical":[23],"score)":[24],"by":[25,44,121],"using":[26,136],"deep":[28],"neural":[29],"autoregressive":[30],"model":[31],"called":[32],"WaveNet.":[33],"Real":[34],"contours":[36,76,109,134],"include":[37],"complicated":[38],"temporal":[39],"and":[40,50,145,177],"frequency":[41],"fluctuations":[42],"caused":[43],"expressions":[46],"such":[47,55,84],"as":[48,56],"vibrato":[49],"portamento.":[51],"Although":[52],"explicit":[53],"models":[54,59],"hidden":[57],"Markov":[58],"(HMMs)":[60],"have":[61],"often":[62],"used":[63,104],"for":[64,94,105,130,160],"representing":[65],"the":[66,79,126,166,172,181,192],"dynamics,":[68],"it":[69],"is":[70,157],"difficult":[71],"to":[72,78],"realistic":[74],"due":[77],"poor":[80],"representation":[81],"capability":[82,127],"models.":[85],"To":[86],"overcome":[87],"this":[88,122],"limitation,":[89],"WaveNet,":[90],"which":[91],"was":[92,102],"invented":[93],"modeling":[95],"raw":[96],"waveforms":[97],"in":[98,116],"an":[99],"unsupervised":[100],"manner,":[101],"recently":[103],"generating":[106,131,161],"score":[113],"with":[114,171],"lyrics":[115],"supervised":[118],"manner.":[119],"Inspired":[120],"attempt,":[123],"we":[124,164],"investigate":[125],"WaveNet":[129,142],"without":[135],"lyric":[137],"information.":[138],"Our":[139],"conditions":[141],"on":[143,180],"pitch":[144],"contextual":[146],"features":[147],"score.":[151],"As":[152],"loss":[154,169],"function":[155],"more":[158],"suitable":[159],"contours,":[163],"adopted":[165],"modified":[167],"cross-entropy":[168],"weighted":[170],"square":[173],"error":[174],"between":[175],"target":[176],"output":[178],"F0s":[179],"log-frequency":[182],"axis.":[183],"The":[184],"experimental":[185],"results":[186],"show":[187],"these":[189],"techniques":[190],"improve":[191],"quality":[193],"generated":[195],"contours.":[197]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
