{"id":"https://openalex.org/W2127684947","doi":"https://doi.org/10.1109/icassp.2002.5743744","title":"Towards a multilingual prosody model for text-to-speech","display_name":"Towards a multilingual prosody model for text-to-speech","publication_year":2002,"publication_date":"2002-05-01","ids":{"openalex":"https://openalex.org/W2127684947","doi":"https://doi.org/10.1109/icassp.2002.5743744","mag":"2127684947"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2002.5743744","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2002.5743744","pdf_url":null,"source":{"id":"https://openalex.org/S4363607879","display_name":"IEEE International Conference on Acoustics Speech and Signal Processing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE International Conference on Acoustics Speech and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043545908","display_name":"Oliver Jokisch","orcid":"https://orcid.org/0000-0001-7411-4420"},"institutions":[{"id":"https://openalex.org/I78650965","display_name":"TU Dresden","ror":"https://ror.org/042aqky30","country_code":"DE","type":"education","lineage":["https://openalex.org/I78650965"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Oliver Jokisch","raw_affiliation_strings":["Laboratory of Acoustics and Speech Communication, Dresden University of Technology, Dresden, Germany","Dresden University of Technology, Laboratory of Acoustics and Speech Communication, 01062, Germany"],"affiliations":[{"raw_affiliation_string":"Laboratory of Acoustics and Speech Communication, Dresden University of Technology, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]},{"raw_affiliation_string":"Dresden University of Technology, Laboratory of Acoustics and Speech Communication, 01062, Germany","institution_ids":["https://openalex.org/I78650965"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040214647","display_name":"Hongwei Ding","orcid":"https://orcid.org/0000-0001-8684-0788"},"institutions":[{"id":"https://openalex.org/I78650965","display_name":"TU Dresden","ror":"https://ror.org/042aqky30","country_code":"DE","type":"education","lineage":["https://openalex.org/I78650965"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hongwei Ding","raw_affiliation_strings":["Laboratory of Acoustics and Speech Communication, Dresden University of Technology, Dresden, Germany","Dresden University of Technology, Laboratory of Acoustics and Speech Communication, 01062, Germany"],"affiliations":[{"raw_affiliation_string":"Laboratory of Acoustics and Speech Communication, Dresden University of Technology, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]},{"raw_affiliation_string":"Dresden University of Technology, Laboratory of Acoustics and Speech Communication, 01062, Germany","institution_ids":["https://openalex.org/I78650965"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053691753","display_name":"Hans Kruschke","orcid":null},"institutions":[{"id":"https://openalex.org/I78650965","display_name":"TU Dresden","ror":"https://ror.org/042aqky30","country_code":"DE","type":"education","lineage":["https://openalex.org/I78650965"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hans Kruschke","raw_affiliation_strings":["Laboratory of Acoustics and Speech Communication, Dresden University of Technology, Dresden, Germany","Dresden University of Technology, Laboratory of Acoustics and Speech Communication, 01062, Germany"],"affiliations":[{"raw_affiliation_string":"Laboratory of Acoustics and Speech Communication, Dresden University of Technology, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]},{"raw_affiliation_string":"Dresden University of Technology, Laboratory of Acoustics and Speech Communication, 01062, Germany","institution_ids":["https://openalex.org/I78650965"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5043545908"],"corresponding_institution_ids":["https://openalex.org/I78650965"],"apc_list":null,"apc_paid":null,"fwci":1.0652,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.76638817,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"I","last_page":"421"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.8389139175415039},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7435669898986816},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.6712052226066589},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.592420220375061},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.5905421376228333},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5203884840011597},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4888538718223572},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45873820781707764},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4520818591117859},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2400435209274292},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10063543915748596}],"concepts":[{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.8389139175415039},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7435669898986816},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.6712052226066589},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.592420220375061},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.5905421376228333},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5203884840011597},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4888538718223572},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45873820781707764},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4520818591117859},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2400435209274292},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10063543915748596},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2002.5743744","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2002.5743744","pdf_url":null,"source":{"id":"https://openalex.org/S4363607879","display_name":"IEEE International Conference on Acoustics Speech and Signal Processing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE International Conference on Acoustics Speech and Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7400000095367432,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W38355094","https://openalex.org/W52922326","https://openalex.org/W133393388","https://openalex.org/W1584787447","https://openalex.org/W3215531061","https://openalex.org/W6601558908","https://openalex.org/W6602183247","https://openalex.org/W6634852393","https://openalex.org/W6804632464"],"related_works":["https://openalex.org/W10581632","https://openalex.org/W1927421023","https://openalex.org/W3149582125","https://openalex.org/W157238252","https://openalex.org/W2169632867","https://openalex.org/W2465421051","https://openalex.org/W152045069","https://openalex.org/W652196294","https://openalex.org/W2368700418","https://openalex.org/W1965141925"],"abstract_inverted_index":{"The":[0,48,90,167],"generation":[1],"of":[2,101,134],"prosodic":[3,88,103,156],"parameters":[4,157],"such":[5],"as":[6],"F0":[7,123],"contour,":[8],"duration":[9,126],"and":[10,50,59,73,86,98,110,117,125,139,171,177],"intensity":[11],"still":[12],"remains":[13],"an":[14],"important":[15],"issue":[16],"for":[17,146],"naturally-sounding":[18],"text-to-speech":[19],"(TTS),":[20],"although":[21],"recently":[22],"developed":[23],"TTS":[24,61],"systems":[25],"have":[26,41],"achieved":[27],"a":[28,56,70,179],"considerable":[29],"progress.":[30],"Several":[31],"appropriate":[32,102],"but":[33],"language-specific":[34],"rule-based,":[35],"statistical":[36],"or":[37,81],"data-driven":[38,72,170],"prosody":[39,149,181],"models":[40,53],"been":[42],"successfully":[43],"realized":[44],"in":[45,183],"many":[46],"systems.":[47],"language":[49],"parameter":[51],"dependent":[52],"lead":[54],"to":[55,122],"more":[57],"complex":[58],"inefficient":[60],"system":[62],"design.":[63],"In":[64],"earlier":[65],"works":[66],"the":[67,94,99,131,142,147],"authors":[68],"proposed":[69],"hybrid":[71,169],"rule-based":[74,172],"model,":[75],"which":[76],"can":[77],"adjust":[78],"different":[79,107],"voices":[80],"speaking":[82],"styles":[83],"by":[84],"learning":[85],"predicting":[87],"parameters.":[89],"current":[91],"paper":[92],"discusses":[93],"multilingual":[95,148,180],"model":[96,143,173],"generalization":[97],"design":[100],"databases.":[104],"Exemplary,":[105],"two":[106],"languages:":[108],"German":[109],"Mandarin":[111],"Chinese":[112],"are":[113,128,137],"examined.":[114],"Prediction":[115],"results":[116,133],"perceptual":[118,132],"evaluation":[119],"with":[120],"respect":[121],"contours":[124],"values":[127],"presented.":[129],"Since":[130],"both":[135],"languages":[136],"comparable":[138],"quite":[140],"satisfying,":[141],"is":[144,174],"qualified":[145],"control.":[150],"Resynthesis":[151],"stimuli":[152],"obtained":[153],"from":[154],"modified":[155],"partly":[158],"achieve":[159],"near-to-natural":[160],"mean":[161],"opinion":[162],"scores":[163],"(MOS)":[164],"above":[165],"4.0.":[166],"introduced":[168],"comparatively":[175],"simple":[176],"enables":[178],"control":[182],"TTS.":[184]},"counts_by_year":[{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
