{"id":"https://openalex.org/W4297841509","doi":"https://doi.org/10.21437/interspeech.2022-388","title":"Transplantation of Conversational Speaking Style with Interjections in Sequence-to-Sequence Speech Synthesis","display_name":"Transplantation of Conversational Speaking Style with Interjections in Sequence-to-Sequence Speech Synthesis","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4297841509","doi":"https://doi.org/10.21437/interspeech.2022-388"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-388","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-388","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105985451","display_name":"Ra\u00fal Fern\u00e1ndez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raul Fernandez","raw_affiliation_strings":["IBM Research AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049318367","display_name":"David Haws","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Haws","raw_affiliation_strings":["IBM Research AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008672977","display_name":"Guy Lorberbom","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guy Lorberbom","raw_affiliation_strings":["IBM Research AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035109881","display_name":"Slava Shechtman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Slava Shechtman","raw_affiliation_strings":["IBM Research AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029658066","display_name":"A. S. Sorin","orcid":"https://orcid.org/0000-0002-9775-2115"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexander Sorin","raw_affiliation_strings":["IBM Research AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4153,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.58335542,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"5488","last_page":"5492"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.917900025844574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.917900025844574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.7220929861068726},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5944077968597412},{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.5663368701934814},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5326098203659058},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5225752592086792},{"id":"https://openalex.org/keywords/transplantation","display_name":"Transplantation","score":0.5091299414634705},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46483349800109863},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.40833139419555664},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37307876348495483},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.11336511373519897},{"id":"https://openalex.org/keywords/literature","display_name":"Literature","score":0.09692293405532837},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.09167766571044922},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.08988499641418457},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.06866323947906494},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.062416404485702515}],"concepts":[{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.7220929861068726},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5944077968597412},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.5663368701934814},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5326098203659058},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5225752592086792},{"id":"https://openalex.org/C2911091166","wikidata":"https://www.wikidata.org/wiki/Q106419912","display_name":"Transplantation","level":2,"score":0.5091299414634705},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46483349800109863},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.40833139419555664},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37307876348495483},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.11336511373519897},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.09692293405532837},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.09167766571044922},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.08988499641418457},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.06866323947906494},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.062416404485702515},{"id":"https://openalex.org/C141071460","wikidata":"https://www.wikidata.org/wiki/Q40821","display_name":"Surgery","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-388","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-388","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1731081199","https://openalex.org/W2039143993","https://openalex.org/W2515028311","https://openalex.org/W2547875792","https://openalex.org/W2548228487","https://openalex.org/W2604184139","https://openalex.org/W2691405956","https://openalex.org/W2888797456","https://openalex.org/W2963091184","https://openalex.org/W2964243274","https://openalex.org/W2972606665","https://openalex.org/W2985067290","https://openalex.org/W3022876224","https://openalex.org/W3091928890","https://openalex.org/W3095012670","https://openalex.org/W3101882441","https://openalex.org/W3140429000","https://openalex.org/W3144667183","https://openalex.org/W3151309757","https://openalex.org/W3160799772","https://openalex.org/W3161492781","https://openalex.org/W3163143169","https://openalex.org/W3193700177","https://openalex.org/W3198712562","https://openalex.org/W4295731579","https://openalex.org/W4298326265"],"related_works":["https://openalex.org/W2611614995","https://openalex.org/W2368651715","https://openalex.org/W2789919619","https://openalex.org/W1552159754","https://openalex.org/W3107474891","https://openalex.org/W4206358631","https://openalex.org/W2794438528","https://openalex.org/W3161695192","https://openalex.org/W2938833595","https://openalex.org/W2267427430"],"abstract_inverted_index":{"Sequence-to-Sequence":[0],"Text-to-Speech":[1],"architectures":[2],"that":[3],"directly":[4],"generate":[5],"low":[6],"level":[7],"acoustic":[8],"features":[9],"from":[10,37,66],"phonetic":[11],"sequences":[12],"are":[13],"known":[14],"to":[15,41],"produce":[16],"natural":[17],"and":[18,32,54,75,82],"expressive":[19],"speech":[20],"when":[21,90],"provided":[22],"with":[23,72,92,109],"adequate":[24],"amounts":[25],"of":[26,86,98],"training":[27],"data.Such":[28],"systems":[29],"can":[30],"learn":[31],"transfer":[33,65,89,108],"desired":[34],"speaking":[35],"styles":[36],"one":[38],"seen":[39],"speaker":[40],"another":[42],"(in":[43],"multi-style":[44],"multi-speaker":[45],"settings),":[46],"which":[47],"is":[48],"highly":[49],"desirable":[50],"for":[51],"creating":[52],"scalable":[53],"customizable":[55],"Human-Computer":[56],"Interaction":[57],"systems.In":[58],"this":[59,102],"work":[60],"we":[61],"explore":[62,83],"one-to-many":[63],"style":[64,73,88,107],"a":[67,96,113],"dedicated":[68],"single-speaker":[69],"conversational":[70],"corpus":[71,80],"nuances":[74],"interjections.We":[76],"elaborate":[77],"on":[78],"the":[79,84],"design":[81],"feasibility":[85],"such":[87],"assisted":[91],"Voice-Conversion-based":[93],"data":[94],"augmentation.In":[95],"set":[97],"subjective":[99],"listening":[100],"experiments,":[101],"approach":[103],"resulted":[104],"in":[105,123],"high-fidelity":[106],"no":[110],"quality":[111],"degradation.However,":[112],"certain":[114],"voice":[115,124],"persona":[116],"shift":[117],"was":[118],"observed,":[119],"requiring":[120],"further":[121],"improvements":[122],"conversion.":[125]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
