{"id":"https://openalex.org/W2612376646","doi":"https://doi.org/10.1109/iscslp.2016.7918450","title":"Discourse prosody and its application to speech synthesis","display_name":"Discourse prosody and its application to speech synthesis","publication_year":2016,"publication_date":"2016-10-01","ids":{"openalex":"https://openalex.org/W2612376646","doi":"https://doi.org/10.1109/iscslp.2016.7918450","mag":"2612376646"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2016.7918450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2016.7918450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 10th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100685461","display_name":"Na Hu","orcid":"https://orcid.org/0000-0001-6362-0969"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Na Hu","raw_affiliation_strings":["iFLYTEK Research, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, Hefei, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052805910","display_name":"Pengfei Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pengfei Shao","raw_affiliation_strings":["iFLYTEK Research, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, Hefei, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056170033","display_name":"Yiqing Zu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yiqing Zu","raw_affiliation_strings":["iFLYTEK Research, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, Hefei, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013128260","display_name":"Zuyan Wang","orcid":"https://orcid.org/0000-0002-7338-5912"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zuyan Wang","raw_affiliation_strings":["iFLYTEK Research, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, Hefei, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101399361","display_name":"Wei Huang","orcid":"https://orcid.org/0000-0002-4817-8858"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Huang","raw_affiliation_strings":["iFLYTEK Research, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, Hefei, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069365665","display_name":"Shijin Wang","orcid":"https://orcid.org/0000-0002-4788-0530"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shijin Wang","raw_affiliation_strings":["iFLYTEK Research, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, Hefei, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2915,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.83818961,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"12","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.9242933392524719},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.8792383074760437},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6806261539459229},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6514183282852173},{"id":"https://openalex.org/keywords/rhetorical-question","display_name":"Rhetorical question","score":0.6066479682922363},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5449397563934326},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.534834623336792},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.5177758932113647},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5090277194976807},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.48519089818000793},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41442549228668213},{"id":"https://openalex.org/keywords/speech-technology","display_name":"Speech technology","score":0.41090449690818787},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.37135860323905945},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.13072118163108826},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.12233638763427734}],"concepts":[{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.9242933392524719},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.8792383074760437},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6806261539459229},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6514183282852173},{"id":"https://openalex.org/C192562157","wikidata":"https://www.wikidata.org/wiki/Q316694","display_name":"Rhetorical question","level":2,"score":0.6066479682922363},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5449397563934326},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.534834623336792},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.5177758932113647},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5090277194976807},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48519089818000793},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41442549228668213},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.41090449690818787},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.37135860323905945},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.13072118163108826},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.12233638763427734},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp.2016.7918450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2016.7918450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 10th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W127438004","https://openalex.org/W1582784770","https://openalex.org/W1600143047","https://openalex.org/W1718173834","https://openalex.org/W2006065004","https://openalex.org/W2031180920","https://openalex.org/W2045738181","https://openalex.org/W2066006378","https://openalex.org/W2070265857","https://openalex.org/W2088160102","https://openalex.org/W2110332474","https://openalex.org/W2127494401","https://openalex.org/W2153252552","https://openalex.org/W2167702024","https://openalex.org/W6605109535","https://openalex.org/W6684552912"],"related_works":["https://openalex.org/W169399214","https://openalex.org/W1914543332","https://openalex.org/W1984347656","https://openalex.org/W4391272374","https://openalex.org/W2465421051","https://openalex.org/W2161314515","https://openalex.org/W4200068392","https://openalex.org/W2184371793","https://openalex.org/W2537969829","https://openalex.org/W4312742405"],"abstract_inverted_index":{"This":[0],"paper":[1],"reveals":[2],"the":[3,26,50,56,111,116],"correlations":[4],"between":[5],"discourse":[6,17,22,103],"structure":[7,23,92],"and":[8,11,54],"acoustic":[9],"parameters":[10],"presents":[12],"a":[13],"method":[14],"of":[15,28],"manipulating":[16],"prosody":[18],"in":[19],"relation":[20],"to":[21,24],"improve":[25,132],"naturalness":[27],"synthesis":[29,113],"speech.":[30],"The":[31,37,61,106],"text":[32],"material":[33],"included":[34],"1229":[35],"passages.":[36],"texts":[38],"were":[39,47,59,71,100],"annotated":[40],"using":[41],"Rhetorical":[42],"Structure":[43],"Theory.":[44],"Prosody":[45],"measurements":[46],"extracted":[48],"from":[49],"corresponding":[51],"speech":[52,96,114,130],"annotation":[53,135],"then":[55],"statistic":[57],"analysis":[58],"conducted.":[60],"results":[62,108],"showed":[63],"that:":[64],"1)":[65],"segments":[66,78],"at":[67],"higher":[68],"hierarchical":[69],"level":[70],"preceded":[72],"with":[73,102],"longer":[74,82],"pause":[75],"durations;":[76],"2)":[77],"bearing":[79],"nucleus":[80],"possessed":[81],"average":[83],"duration":[84],"than":[85],"satellites":[86],"did.":[87],"To":[88],"test":[89],"if":[90],"rhetorical":[91],"would":[93,131],"benefit":[94],"synthesized":[95,101,129],"prosody,":[97],"15":[98],"passages":[99],"features":[104,127],"implemented.":[105],"evaluation":[107],"indicated":[109],"that":[110,124],"modified":[112],"excelled":[115],"baseline":[117],"system":[118],"by":[119],"0.1":[120],"MOS":[121],"point,":[122],"suggesting":[123],"implementing":[125],"prosodic":[126],"into":[128],"overall":[133],"prosody.speech":[134]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
