{"id":"https://openalex.org/W4225300652","doi":"https://doi.org/10.1109/icassp43922.2022.9747837","title":"Enhancing Speaking Styles in Conversational Text-to-Speech Synthesis with Graph-Based Multi-Modal Context Modeling","display_name":"Enhancing Speaking Styles in Conversational Text-to-Speech Synthesis with Graph-Based Multi-Modal Context Modeling","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225300652","doi":"https://doi.org/10.1109/icassp43922.2022.9747837"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747837","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747837","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047338075","display_name":"Jingbei Li","orcid":"https://orcid.org/0000-0002-6284-5979"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Jingbei Li","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University,Shenzhen,China","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University,Shenzhen,China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100940419","display_name":"Yi Meng","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Yi Meng","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University,Shenzhen,China","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University,Shenzhen,China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054487597","display_name":"Chenyi Li","orcid":"https://orcid.org/0000-0001-8294-1880"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Chenyi Li","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University,Shenzhen,China","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University,Shenzhen,China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102869280","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0001-8533-0524"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University,Shenzhen,China","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University,Shenzhen,China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]},{"raw_affiliation_string":"Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019458385","display_name":"Helen Meng","orcid":"https://orcid.org/0000-0002-4427-3532"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Helen Meng","raw_affiliation_strings":["The Chinese University of Hong Kong,Department of Systems Engineering and Engineering Management,Hong Kong SAR, China","Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Department of Systems Engineering and Engineering Management,Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106404246","display_name":"Chao Weng","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Weng","raw_affiliation_strings":["AI Lab, Tencent,Shenzhen,China","AI Lab, Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"AI Lab, Tencent,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"AI Lab, Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075183307","display_name":"Dan Su","orcid":"https://orcid.org/0000-0001-5746-9545"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Su","raw_affiliation_strings":["AI Lab, Tencent,Shenzhen,China","AI Lab, Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"AI Lab, Tencent,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"AI Lab, Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5047338075"],"corresponding_institution_ids":["https://openalex.org/I889458895","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.2031,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.89569642,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"7917","last_page":"7921"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8314700126647949},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.7693915367126465},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.6777129769325256},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5868678689002991},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5526655912399292},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5105735063552856},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5034670233726501},{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.472968727350235},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.46029719710350037},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4547748267650604},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.29884856939315796}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8314700126647949},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.7693915367126465},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.6777129769325256},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5868678689002991},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5526655912399292},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5105735063552856},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5034670233726501},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.472968727350235},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.46029719710350037},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4547748267650604},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.29884856939315796},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747837","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747837","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7599999904632568}],"awards":[],"funders":[{"id":"https://openalex.org/F4320316083","display_name":"Tencent","ror":"https://ror.org/00hhjss72"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322392","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1731081199","https://openalex.org/W2001538905","https://openalex.org/W2157331557","https://openalex.org/W2794490148","https://openalex.org/W2795109282","https://openalex.org/W2890964092","https://openalex.org/W2896457183","https://openalex.org/W2952436057","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2985882473","https://openalex.org/W3015282541","https://openalex.org/W3021636956","https://openalex.org/W3033411150","https://openalex.org/W3092028330","https://openalex.org/W3102725307","https://openalex.org/W3146550708","https://openalex.org/W3151309757","https://openalex.org/W3197704090","https://openalex.org/W3198152857","https://openalex.org/W4295312788","https://openalex.org/W4295731579","https://openalex.org/W4385245566","https://openalex.org/W6637618735","https://openalex.org/W6736996214","https://openalex.org/W6739901393","https://openalex.org/W6750489868","https://openalex.org/W6755207826","https://openalex.org/W6764398373","https://openalex.org/W6766978945","https://openalex.org/W6778823374","https://openalex.org/W6783867762"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W1968552888","https://openalex.org/W2374116601","https://openalex.org/W3093134843","https://openalex.org/W1511346092","https://openalex.org/W1527532029","https://openalex.org/W2529301793","https://openalex.org/W2378167147","https://openalex.org/W3210777354","https://openalex.org/W3133700904"],"abstract_inverted_index":{"Comparing":[0],"with":[1,14,38],"traditional":[2],"text-to-speech":[3],"(TTS)":[4],"systems,":[5],"conversational":[6,21,29,78,96,186],"TTS":[7,30,97,187],"systems":[8],"are":[9,116,135],"required":[10],"to":[11,19,95,98,122,143,164],"synthesize":[12],"speeches":[13],"proper":[15],"speaking":[16,60,101,110,146],"style":[17,111,147],"confirming":[18],"the":[20,33,51,59,63,100,107,114,124,144,172,180],"context.":[22],"However,":[23],"state-of-the-art":[24,181],"context":[25,37,89,115,182],"modeling":[26,50,76,90,183],"methods":[27,45],"in":[28,36,49,54,75,113,129,185,188],"only":[31],"model":[32,123],"textual":[34,108],"information":[35,112],"a":[39,86],"recurrent":[40],"neural":[41],"network":[42],"(RNN).":[43],"Such":[44],"have":[46],"limited":[47],"ability":[48],"inter-speaker":[52],"influence":[53,128],"conversations,":[55],"and":[56,62,72,92,109,118,126,141,157,162,191],"also":[57],"neglect":[58],"styles":[61,102],"intra-speaker":[64,127],"inertia":[65],"inside":[66],"each":[67],"speaker.":[68],"Inspired":[69],"by":[70,120,138],"DialogueGCN":[71,121,134],"its":[73],"superiority":[74],"such":[77],"influences":[79],"than":[80],"RNN":[81],"based":[82],"approaches,":[83],"we":[84],"propose":[85],"graph-based":[87],"multi-modal":[88],"method":[91,184],"adopt":[93],"it":[94],"enhance":[99],"of":[103,133,174],"synthesized":[104],"speeches.":[105],"Both":[106],"extracted":[117],"processed":[119],"inter-":[125],"conversations.":[130],"The":[131],"outputs":[132],"then":[136],"summarized":[137],"attention":[139],"mechanism,":[140],"converted":[142],"enhanced":[145],"for":[148,159],"current":[149],"utterance.":[150],"An":[151],"English":[152],"conversation":[153],"corpus":[154,170],"is":[155],"collected":[156],"annotated":[158],"our":[160,175],"research":[161],"released":[163],"public.":[165],"Experiment":[166],"results":[167],"on":[168],"this":[169],"demonstrate":[171],"effectiveness":[173],"proposed":[176],"approach,":[177],"which":[178],"outperforms":[179],"both":[189],"MOS":[190],"ABX":[192],"preference":[193],"rate.":[194]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
