{"id":"https://openalex.org/W2592710836","doi":"https://doi.org/10.1109/taslp.2017.2679603","title":"Personalized Spontaneous Speech Synthesis Using a Small-Sized Unsegmented Semispontaneous Speech","display_name":"Personalized Spontaneous Speech Synthesis Using a Small-Sized Unsegmented Semispontaneous Speech","publication_year":2017,"publication_date":"2017-03-08","ids":{"openalex":"https://openalex.org/W2592710836","doi":"https://doi.org/10.1109/taslp.2017.2679603","mag":"2592710836"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2017.2679603","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2679603","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112612990","display_name":"Yi\u2010Chin Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4880106","display_name":"Feng Chia University","ror":"https://ror.org/05vhczg54","country_code":"TW","type":"education","lineage":["https://openalex.org/I4880106"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yi-Chin Huang","raw_affiliation_strings":["Department of Information Engineering and Computer Science, Feng Chia University, Taichung, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Information Engineering and Computer Science, Feng Chia University, Taichung, Taiwan","institution_ids":["https://openalex.org/I4880106"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103251327","display_name":"Chung\u2010Hsien Wu","orcid":"https://orcid.org/0000-0002-3947-2123"},"institutions":[{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chung-Hsien Wu","raw_affiliation_strings":["Department of Computer Science and Information Engineering, National Cheng Kung University, Tainan, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Cheng Kung University, Tainan, Taiwan","institution_ids":["https://openalex.org/I91807558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058278936","display_name":"Yan-You Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yan-You Chen","raw_affiliation_strings":["Department of Electrical Engineering, National Cheng Kung University, Tainan, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Cheng Kung University, Tainan, Taiwan","institution_ids":["https://openalex.org/I91807558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024786590","display_name":"Ming-Ge Shie","orcid":null},"institutions":[{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ming-Ge Shie","raw_affiliation_strings":["Department of Computer Science and Information Engineering, National Cheng Kung University, Tainan, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Cheng Kung University, Tainan, Taiwan","institution_ids":["https://openalex.org/I91807558"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088136847","display_name":"Jhing-Fa Wang","orcid":"https://orcid.org/0009-0000-2816-2480"},"institutions":[{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Jhing-Fa Wang","raw_affiliation_strings":["Department of Electrical Engineering, National Cheng Kung University, Tainan, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Cheng Kung University, Tainan, Taiwan","institution_ids":["https://openalex.org/I91807558"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.01737302,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"25","issue":"5","first_page":"1048","last_page":"1060"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.8330711722373962},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7704615592956543},{"id":"https://openalex.org/keywords/speech-segmentation","display_name":"Speech segmentation","score":0.6435846090316772},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5984693765640259},{"id":"https://openalex.org/keywords/smoothing","display_name":"Smoothing","score":0.5837429165840149},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.46832209825515747},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.4639347195625305},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4461480379104614},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.438503623008728},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4206288754940033},{"id":"https://openalex.org/keywords/viterbi-algorithm","display_name":"Viterbi algorithm","score":0.4132389426231384},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.38998138904571533},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.2508541941642761}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.8330711722373962},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7704615592956543},{"id":"https://openalex.org/C207030507","wikidata":"https://www.wikidata.org/wiki/Q2266173","display_name":"Speech segmentation","level":3,"score":0.6435846090316772},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5984693765640259},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.5837429165840149},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.46832209825515747},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.4639347195625305},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4461480379104614},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.438503623008728},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4206288754940033},{"id":"https://openalex.org/C60582962","wikidata":"https://www.wikidata.org/wiki/Q83886","display_name":"Viterbi algorithm","level":3,"score":0.4132389426231384},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.38998138904571533},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.2508541941642761},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2017.2679603","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2679603","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4099999964237213,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G2071450271","display_name":null,"funder_award_id":"MOST105-2218-E-006-028","funder_id":"https://openalex.org/F4320309618","funder_display_name":"Ministry of Science and Technology"}],"funders":[{"id":"https://openalex.org/F4320309618","display_name":"Ministry of Science and Technology","ror":"https://ror.org/02b207r52"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W11234093","https://openalex.org/W45736138","https://openalex.org/W54771919","https://openalex.org/W86186305","https://openalex.org/W110305337","https://openalex.org/W157389678","https://openalex.org/W1509905243","https://openalex.org/W1563185989","https://openalex.org/W1572730534","https://openalex.org/W1585280831","https://openalex.org/W1602430027","https://openalex.org/W1789888671","https://openalex.org/W1927421023","https://openalex.org/W1986174057","https://openalex.org/W2000513720","https://openalex.org/W2030044894","https://openalex.org/W2032899051","https://openalex.org/W2038086080","https://openalex.org/W2039241583","https://openalex.org/W2049036695","https://openalex.org/W2049686551","https://openalex.org/W2051741485","https://openalex.org/W2066186361","https://openalex.org/W2093450784","https://openalex.org/W2096270777","https://openalex.org/W2097931979","https://openalex.org/W2104653033","https://openalex.org/W2114980378","https://openalex.org/W2116648050","https://openalex.org/W2120605154","https://openalex.org/W2131033001","https://openalex.org/W2132435454","https://openalex.org/W2139795704","https://openalex.org/W2142384583","https://openalex.org/W2150612204","https://openalex.org/W2150658333","https://openalex.org/W2151311453","https://openalex.org/W2153914468","https://openalex.org/W2154920538","https://openalex.org/W2159937480","https://openalex.org/W2162295204","https://openalex.org/W2165203136","https://openalex.org/W2165436104","https://openalex.org/W2166866311","https://openalex.org/W2293049663","https://openalex.org/W2296704011","https://openalex.org/W2309282333","https://openalex.org/W2365209858","https://openalex.org/W2395956841","https://openalex.org/W2406211585","https://openalex.org/W4251158933","https://openalex.org/W4285719527","https://openalex.org/W6600452588","https://openalex.org/W6601834460","https://openalex.org/W6604471140","https://openalex.org/W6633837372","https://openalex.org/W6634410540","https://openalex.org/W6636404455","https://openalex.org/W6697285287","https://openalex.org/W6712582474","https://openalex.org/W6714372243","https://openalex.org/W7025091378"],"related_works":["https://openalex.org/W1980401417","https://openalex.org/W2164147372","https://openalex.org/W2550171623","https://openalex.org/W3106321628","https://openalex.org/W4253660971","https://openalex.org/W1909292483","https://openalex.org/W2111874347","https://openalex.org/W2125642021","https://openalex.org/W2185667427","https://openalex.org/W1428730622"],"abstract_inverted_index":{"A":[0,61],"systematic":[1],"approach":[2],"is":[3,25,46,67,103,123],"proposed":[4,104,121,141],"to":[5,27,48,69,105,125,155],"synthesizing":[6],"personalized":[7],"spontaneous":[8,92,113],"speech":[9,14,34,78,107,114,153],"using":[10,57],"a":[11,31,41,94],"small-sized":[12],"unsegmented":[13],"corpus":[15,35],"of":[16,36,52,75,86,119,127,133,150],"the":[17,37,49,53,58,72,76,83,87,99,116,120,140,145,151,156],"target":[18,38,54],"speaker.":[19,39],"First,":[20],"an":[21],"automatic":[22],"segmentation":[23,117],"algorithm":[24],"employed":[26],"segment":[28],"and":[29,147],"label":[30],"collected":[32],"semispontaneous":[33],"Then,":[40],"pretrained":[42],"average":[43],"voice":[44,50],"model":[45,51],"adapted":[47],"speaker":[55,73,148,162],"by":[56],"segmented":[59],"data.":[60],"postfilter":[62],"based":[63,161],"on":[64,112],"modulation":[65],"spectrum":[66],"adopted":[68],"further":[70],"improve":[71,106,144],"similarity":[74,149],"synthesized":[77,88,152],"as":[79,81],"well":[80],"alleviate":[82],"over-smoothing":[84],"problem":[85],"speech.":[89],"For":[90,109],"generating":[91],"speech,":[93],"smoothing":[95],"method":[96,122,142],"applied":[97],"at":[98],"prosodic":[100],"word":[101],"level":[102],"fluency.":[108],"objective":[110],"evaluation":[111],"segmentation,":[115],"accuracy":[118],"superior":[124],"that":[126,139],"Viterbi-based":[128],"forced":[129],"alignment.":[130],"The":[131],"results":[132],"subjective":[134],"listening":[135],"test":[136],"also":[137],"show":[138],"can":[143],"spontaneity":[146],"compared":[154],"maximum":[157],"likelihood":[158],"linear":[159],"regression":[160],"adaptation":[163],"method.":[164]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
