{"id":"https://openalex.org/W2101993158","doi":"https://doi.org/10.1109/iscslp.2010.5684485","title":"Combining HMM spectrum models and ANN prosody models for speech synthesis of syllable prominent languages","display_name":"Combining HMM spectrum models and ANN prosody models for speech synthesis of syllable prominent languages","publication_year":2010,"publication_date":"2010-11-01","ids":{"openalex":"https://openalex.org/W2101993158","doi":"https://doi.org/10.1109/iscslp.2010.5684485","mag":"2101993158"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2010.5684485","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2010.5684485","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 7th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112041866","display_name":"Hung\u2010Yan Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Hung-Yan Gu","raw_affiliation_strings":["Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan","Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]},{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#","institution_ids":["https://openalex.org/I154864474"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071860744","display_name":"Ming-Yen Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ming-Yen Lai","raw_affiliation_strings":["Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan","Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]},{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#","institution_ids":["https://openalex.org/I154864474"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029285507","display_name":"Sung-Feng Tsai","orcid":null},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Sung-Feng Tsai","raw_affiliation_strings":["Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan","Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]},{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#","institution_ids":["https://openalex.org/I154864474"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112041866"],"corresponding_institution_ids":["https://openalex.org/I154864474"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.13185879,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"22","issue":null,"first_page":"451","last_page":"454"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.8419187068939209},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7478456497192383},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.7239975333213806},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.69756019115448},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.6484126448631287},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6088439226150513},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.5435171127319336},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.5039014220237732},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5000519752502441},{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation (computer graphics)","score":0.4779089391231537},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4741213619709015},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.4191461205482483},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33377718925476074},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11125513911247253},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.08974406123161316}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.8419187068939209},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7478456497192383},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.7239975333213806},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.69756019115448},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.6484126448631287},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6088439226150513},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.5435171127319336},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.5039014220237732},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5000519752502441},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.4779089391231537},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4741213619709015},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.4191461205482483},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33377718925476074},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11125513911247253},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.08974406123161316},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp.2010.5684485","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2010.5684485","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 7th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6499999761581421,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321040","display_name":"National Science Council","ror":"https://ror.org/02kv4zf79"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W95551363","https://openalex.org/W1513877108","https://openalex.org/W1560013842","https://openalex.org/W1970763740","https://openalex.org/W1973766695","https://openalex.org/W2119174565","https://openalex.org/W2119854751","https://openalex.org/W2137890270","https://openalex.org/W2154920538","https://openalex.org/W2159420560","https://openalex.org/W2473887643","https://openalex.org/W4285719527","https://openalex.org/W6630780049","https://openalex.org/W6677898495","https://openalex.org/W6677921629","https://openalex.org/W6680221371","https://openalex.org/W6720646519"],"related_works":["https://openalex.org/W2396586136","https://openalex.org/W2222951281","https://openalex.org/W2272290179","https://openalex.org/W156219719","https://openalex.org/W2068412075","https://openalex.org/W1505084104","https://openalex.org/W27484908","https://openalex.org/W2599935372","https://openalex.org/W10581632","https://openalex.org/W1927421023"],"abstract_inverted_index":{"In":[0,52,102],"this":[1,33,38,122],"paper,":[2],"an":[3,100,112,144,159,189],"approach":[4,39],"that":[5,37,204],"combines":[6],"HMM":[7,113,135,145,233],"spectrum":[8],"models":[9,13],"and":[10,50,69,130,179],"ANN":[11,129,186,221],"prosody":[12],"is":[14,26,117,172],"proposed":[15],"to":[16,28,87,98,121,139,162,187],"construct":[17],"a":[18,23,77,83,110,127],"speech":[19,170,213,227],"synthesis":[20,104],"system.":[21],"Currently,":[22],"Mandarin":[24],"corpus":[25,68],"used":[27,42,70,97,138],"show":[29,203],"the":[30,53,66,103,115,134,152,176,180,215,219,229],"feasibility":[31],"of":[32,65,76,109,114,133,154,200,232],"approach.":[34],"We":[35],"hope":[36],"can":[40,222],"be":[41,148],"in":[43],"other":[44],"syllable":[45,78,94,108,116],"prominent":[46],"languages":[47],"like":[48],"Min-Nan":[49],"Hakka.":[51],"training":[54,67],"phase,":[55,105],"DCC":[56,164,177],"(discrete":[57],"cepstrum":[58],"coefficients)":[59],"are":[60,79,96,137],"computed":[61],"for":[62,106,166],"each":[63,92,107,167],"frame":[64],"as":[71],"spectral":[72],"parameters.":[73],"Multiple":[74],"utterances":[75,95],"first":[80,119],"grouped":[81],"into":[82],"few":[84],"clusters":[85],"according":[86,120],"their":[88],"DTW":[89],"paths.":[90],"Then,":[91,126],"cluster's":[93],"train":[99],"HMM.":[101],"sentence,":[111],"selected":[118],"syllable's":[123],"contextual":[124],"data.":[125],"duration":[128,131,220,230],"means":[132,231],"states":[136],"determine":[140],"how":[141],"many":[142],"frames":[143],"state":[146],"should":[147],"assigned.":[149],"To":[150],"achieve":[151],"goal":[153],"real-time":[155],"synthesis,":[156],"we":[157],"propose":[158],"interpolation":[160,206],"method":[161,207],"generate":[163],"coefficients":[165,178],"frame.":[168],"Next,":[169],"signal":[171,196],"synthesized":[173],"by":[174,184],"using":[175],"pitch":[181],"contour":[182],"generated":[183],"another":[185],"control":[188],"HNM":[190],"(harmonic":[191],"plus":[192],"noised":[193],"model)":[194],"based":[195],"synthesizer.":[197],"The":[198],"results":[199],"perception":[201],"tests":[202],"our":[205],"obtains":[208],"slightly":[209],"more":[210,224],"natural":[211,225],"synthetic":[212,226],"than":[214,228],"MLE":[216],"method.":[217],"Also,":[218],"have":[223],"states.":[234]},"counts_by_year":[{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
