{"id":"https://openalex.org/W4283067649","doi":"https://doi.org/10.21437/interspeech.2022-403","title":"Acoustic Modeling for End-to-End Empathetic Dialogue Speech Synthesis Using Linguistic and Prosodic Contexts of Dialogue History","display_name":"Acoustic Modeling for End-to-End Empathetic Dialogue Speech Synthesis Using Linguistic and Prosodic Contexts of Dialogue History","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4283067649","doi":"https://doi.org/10.21437/interspeech.2022-403"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-403","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-403","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102497394","display_name":"Yuto Nishimura","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yuto Nishimura","raw_affiliation_strings":["The University of Tokyo, Japan,"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Japan,","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022417197","display_name":"Yuki Saito","orcid":"https://orcid.org/0000-0002-2297-7205"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuki Saito","raw_affiliation_strings":["The University of Tokyo, Japan,"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Japan,","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013050263","display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinnosuke Takamichi","raw_affiliation_strings":["The University of Tokyo, Japan,"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Japan,","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100925892","display_name":"Kentaro Tachibana","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kentaro Tachibana","raw_affiliation_strings":["LINE Corp., Japan"],"affiliations":[{"raw_affiliation_string":"LINE Corp., Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003814223","display_name":"Hiroshi Saruwatari","orcid":"https://orcid.org/0000-0003-0876-5617"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi Saruwatari","raw_affiliation_strings":["The University of Tokyo, Japan,"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Japan,","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102497394"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.7303,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.70312666,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3373","last_page":"3377"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6574501395225525},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5952873826026917},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5837646722793579},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.521918773651123},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4574093818664551},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2644078731536865},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.057931989431381226}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6574501395225525},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5952873826026917},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5837646722793579},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.521918773651123},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4574093818664551},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2644078731536865},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.057931989431381226}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-403","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-403","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7200000286102295}],"awards":[{"id":"https://openalex.org/G4391777248","display_name":"Sustainably Developable Speech Synthesis Based on Continual Learning","funder_award_id":"21K21305","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320322832","display_name":"University of Tokyo","ror":"https://ror.org/057zh3y96"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1585085236","https://openalex.org/W2091804523","https://openalex.org/W2471520273","https://openalex.org/W2519091744","https://openalex.org/W2519648275","https://openalex.org/W2889394771","https://openalex.org/W2896457183","https://openalex.org/W2901997113","https://openalex.org/W2951583236","https://openalex.org/W2963609956","https://openalex.org/W2963975282","https://openalex.org/W2964243274","https://openalex.org/W2972802841","https://openalex.org/W3030437843","https://openalex.org/W3033411150","https://openalex.org/W3036601975","https://openalex.org/W3081800019","https://openalex.org/W3092028330","https://openalex.org/W3116376974","https://openalex.org/W3121541553","https://openalex.org/W3150572638","https://openalex.org/W3151309757","https://openalex.org/W3161782335","https://openalex.org/W3197861989","https://openalex.org/W3198152857","https://openalex.org/W4205679356","https://openalex.org/W4242300465","https://openalex.org/W4308170756","https://openalex.org/W4313140281"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3179968364","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2151749779","https://openalex.org/W1999612375","https://openalex.org/W2001405890","https://openalex.org/W2382290278"],"abstract_inverted_index":{"We":[0],"propose":[1],"an":[2,70],"end-to-end":[3],"empathetic":[4,35,81,162],"dialogue":[5,19,46,62,76,114,152],"speech":[6,95,125,160,175],"synthesis":[7],"(DSS)":[8],"model":[9,48,83,91],"that":[10,144,178],"considers":[11],"both":[12],"the":[13,22,30,52,73,80,106,113,151,157,180],"linguistic":[14,55],"and":[15,34,56,124,127,164,169],"prosodic":[16,148],"contexts":[17,149],"of":[18,54,72,105,150,159],"history.Empathy":[20],"is":[21,37,49],"active":[23],"attempt":[24],"by":[25,51,112,179],"humans":[26],"to":[27,40,109,121,132],"get":[28],"inside":[29],"interlocutor":[31],"in":[32,44,161],"dialogue,":[33],"DSS":[36,82,163],"a":[38,88,98,102,118,129],"technology":[39],"implement":[41],"this":[42],"act":[43],"spoken":[45],"systems.Our":[47],"conditioned":[50],"history":[53,77,153],"prosody":[57,103,135],"features":[58],"for":[59],"predicting":[60],"appropriate":[61],"context.As":[63],"such,":[64],"it":[65],"can":[66],"be":[67,110],"regarded":[68],"as":[69],"extension":[71],"conventional":[74,181],"linguistic-feature-based":[75],"modeling.To":[78],"train":[79],"effectively,":[84],"we":[85],"investigate":[86],"1)":[87,145],"self-supervised":[89],"learning":[90],"pretrained":[92],"with":[93],"large":[94],"corpora,":[96],"2)":[97,165],"style-guided":[99,167],"training":[100,168],"using":[101],"embedding":[104,131,171],"current":[107],"utterance":[108],"predicted":[111],"context":[115],"embedding,":[116],"3)":[117],"cross-modal":[119],"attention":[120],"combine":[122],"text":[123],"modalities,":[126],"4)":[128],"sentence-wise":[130,170],"achieve":[133],"fine-grained":[134],"modeling":[136,172],"rather":[137],"than":[138,177],"utterancewise":[139],"modeling.The":[140],"evaluation":[141],"results":[142],"demonstrate":[143],"simply":[146],"considering":[147],"does":[154],"not":[155],"improve":[156],"quality":[158,176],"introducing":[166],"achieves":[173],"higher":[174],"method.":[182]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
