{"id":"https://openalex.org/W3011386605","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023311","title":"DNN-based Statistical Parametric Speech Synthesis Incorporating Non-negative Matrix Factorization","display_name":"DNN-based Statistical Parametric Speech Synthesis Incorporating Non-negative Matrix Factorization","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W3011386605","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023311","mag":"3011386605"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc47483.2019.9023311","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023311","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113897545","display_name":"Shunsuke Goto","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Shunsuke Goto","raw_affiliation_strings":["Graduate School of Engineering, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010841595","display_name":"Daisuke Saito","orcid":"https://orcid.org/0000-0003-3200-579X"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Daisuke Saito","raw_affiliation_strings":["Graduate School of Engineering, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041213266","display_name":"Nobuaki Minematsu","orcid":"https://orcid.org/0000-0002-8778-9555"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nobuaki Minematsu","raw_affiliation_strings":["Graduate School of Engineering, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5113897545"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.1443,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61913922,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"5","issue":null,"first_page":"148","last_page":"153"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9843000173568726,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.8652676343917847},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8234455585479736},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.648755669593811},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6237522959709167},{"id":"https://openalex.org/keywords/spectral-envelope","display_name":"Spectral envelope","score":0.6124776601791382},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.6100444793701172},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.5707993507385254},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5379275679588318},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.42040789127349854},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33675992488861084},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3354286551475525},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30694806575775146},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2650618553161621},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13478729128837585},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09938427805900574}],"concepts":[{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.8652676343917847},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8234455585479736},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.648755669593811},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6237522959709167},{"id":"https://openalex.org/C54926389","wikidata":"https://www.wikidata.org/wiki/Q7575188","display_name":"Spectral envelope","level":2,"score":0.6124776601791382},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.6100444793701172},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.5707993507385254},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5379275679588318},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.42040789127349854},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33675992488861084},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3354286551475525},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30694806575775146},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2650618553161621},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13478729128837585},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09938427805900574},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/apsipaasc47483.2019.9023311","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023311","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},{"id":"mag:3044593647","is_oa":false,"landing_page_url":"https://jglobal.jst.go.jp/en/detail?JGLOBAL_ID=202002279934000900","pdf_url":null,"source":{"id":"https://openalex.org/S4306512817","display_name":"IEEE Conference Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"IEEE Conference Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1576227399","https://openalex.org/W1965255698","https://openalex.org/W2020024436","https://openalex.org/W2102003408","https://openalex.org/W2111284386","https://openalex.org/W2118718620","https://openalex.org/W2120605154","https://openalex.org/W2135029798","https://openalex.org/W2147001670","https://openalex.org/W2158291955","https://openalex.org/W2400654494","https://openalex.org/W2417163358","https://openalex.org/W2471520273","https://openalex.org/W2519091744","https://openalex.org/W2577042574","https://openalex.org/W2749881488","https://openalex.org/W2777302760","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2984862052","https://openalex.org/W6675380101","https://openalex.org/W6677759377","https://openalex.org/W6680012447","https://openalex.org/W6712460684","https://openalex.org/W6732251480"],"related_works":["https://openalex.org/W2551137307","https://openalex.org/W2113526703","https://openalex.org/W4390394189","https://openalex.org/W2037504162","https://openalex.org/W2539013788","https://openalex.org/W2792706544","https://openalex.org/W1568451138","https://openalex.org/W2123043102","https://openalex.org/W2098101267","https://openalex.org/W2577807713"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,36,40,55],"novel":[4],"approach":[5],"of":[6,43,93,151],"DNN-based":[7,56,135],"statistical":[8,21],"parametric":[9,22],"speech":[10,23,199],"synthesis":[11,137],"where":[12,196],"non-negative":[13],"matrix":[14],"factorization":[15],"(NMF)":[16],"is":[17,27,129,140,167,200,202],"effectively":[18],"utilized.":[19],"In":[20,122,142,162],"synthesis,":[24],"Mel-frequency":[25],"cepstrum":[26],"often":[28],"employed":[29,102],"for":[30,103,131],"acoustic":[31,57,104],"features.":[32,105],"However,":[33],"it":[34],"represents":[35],"spectral":[37,70,78,111,132,181],"envelope":[38,45,52,112],"as":[39,154],"linear":[41],"combination":[42],"fixed":[44],"curves":[46],"(sines":[47],"and":[48,50,80,134,169,189],"cosines),":[49],"the":[51,64,86,90,95,110,115,174,205],"predicted":[53,116],"by":[54,114],"model":[58],"loses":[59],"its":[60,108],"fine":[61,91,120],"structure.":[62,121],"On":[63],"other":[65],"hand,":[66],"in":[67,184,204],"NMF,":[68,152],"multiple":[69],"envelopes":[71],"(spectrogram)":[72],"are":[73],"decomposed":[74],"into":[75],"two":[76],"factors;":[77],"bases":[79,88],"their":[81],"activity":[82],"patterns":[83],"(activation).":[84],"Since":[85],"obtained":[87,113],"keep":[89],"structure":[92],"envelopes,":[94],"remaining":[96],"factor,":[97],"i.e.":[98],"activation":[99,117,125],"can":[100,146,177],"be":[101],"Due":[106],"to":[107],"sparseness,":[109],"also":[118],"keeps":[119],"this":[123,144,163],"study,":[124,164],"derived":[126],"from":[127],"NMF":[128,139],"utilized":[130],"representation,":[133],"text-to-speech":[136],"incorporating":[138],"proposed.":[141],"addition,":[143],"framework":[145],"potentially":[147],"incorporate":[148],"some":[149],"applications":[150],"such":[153],"bandwidth":[155,165,194],"expansion,":[156,195],"voice":[157],"conversion,":[158],"or":[159],"noise":[160],"reduction.":[161],"expansion":[166],"achieved,":[168],"experimental":[170],"results":[171],"demonstrate":[172],"that":[173,190],"proposed":[175,206],"method":[176],"generate":[178],"more":[179],"natural":[180,197],"parameters":[182],"especially":[183],"48":[185],"kHz":[186,193],"sampling":[187],"rate,":[188],"16":[191],"kHz-to-48":[192],"synthetic":[198],"produced,":[201],"achieved":[203],"framework.":[207]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
