{"id":"https://openalex.org/W3016151952","doi":"https://doi.org/10.1109/icassp40776.2020.9054107","title":"Breathing and Speech Planning in Spontaneous Speech Synthesis","display_name":"Breathing and Speech Planning in Spontaneous Speech Synthesis","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3016151952","doi":"https://doi.org/10.1109/icassp40776.2020.9054107","mag":"3016151952"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054107","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-283731","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063795282","display_name":"\u00c9va Sz\u00e9kely","orcid":"https://orcid.org/0000-0003-1175-840X"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Eva Szekely","raw_affiliation_strings":["Division of Speech, Music and Hearing, KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Division of Speech, Music and Hearing, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091432739","display_name":"Gustav Eje Henter","orcid":"https://orcid.org/0000-0002-1643-1054"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Gustav Eje Henter","raw_affiliation_strings":["Division of Speech, Music and Hearing, KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Division of Speech, Music and Hearing, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088064508","display_name":"Jonas Beskow","orcid":"https://orcid.org/0000-0003-1399-6604"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Jonas Beskow","raw_affiliation_strings":["Division of Speech, Music and Hearing, KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Division of Speech, Music and Hearing, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015417808","display_name":"Joakim Gustafson","orcid":"https://orcid.org/0000-0002-0397-6442"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Joakim Gustafson","raw_affiliation_strings":["Division of Speech, Music and Hearing, KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Division of Speech, Music and Hearing, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063795282"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":1.9884,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.8895836,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"7649","last_page":"7653"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.8164608478546143},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7982921600341797},{"id":"https://openalex.org/keywords/impromptu","display_name":"Impromptu","score":0.6070859432220459},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5795984268188477},{"id":"https://openalex.org/keywords/breathing","display_name":"Breathing","score":0.5715150833129883},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.5634971261024475},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5144705772399902},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.4627300500869751},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4463846683502197},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4394015371799469},{"id":"https://openalex.org/keywords/trigram","display_name":"Trigram","score":0.26222458481788635},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09560507535934448}],"concepts":[{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.8164608478546143},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7982921600341797},{"id":"https://openalex.org/C2781454322","wikidata":"https://www.wikidata.org/wiki/Q6007730","display_name":"Impromptu","level":2,"score":0.6070859432220459},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5795984268188477},{"id":"https://openalex.org/C39300077","wikidata":"https://www.wikidata.org/wiki/Q9530","display_name":"Breathing","level":2,"score":0.5715150833129883},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.5634971261024475},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5144705772399902},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.4627300500869751},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4463846683502197},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4394015371799469},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.26222458481788635},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09560507535934448},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054107","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:DiVA.org:kth-283731","is_oa":true,"landing_page_url":"http://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-283731","pdf_url":null,"source":{"id":"https://openalex.org/S4306401559","display_name":"KTH Publication Database DiVA (KTH Royal Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference paper"}],"best_oa_location":{"id":"pmh:oai:DiVA.org:kth-283731","is_oa":true,"landing_page_url":"http://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-283731","pdf_url":null,"source":{"id":"https://openalex.org/S4306401559","display_name":"KTH Publication Database DiVA (KTH Royal Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference paper"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.699999988079071}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W183448665","https://openalex.org/W1784795041","https://openalex.org/W1964420823","https://openalex.org/W1966894979","https://openalex.org/W1981663330","https://openalex.org/W1992245461","https://openalex.org/W2092244451","https://openalex.org/W2103133081","https://openalex.org/W2114656113","https://openalex.org/W2116064496","https://openalex.org/W2120847449","https://openalex.org/W2147760966","https://openalex.org/W2176828964","https://openalex.org/W2578321825","https://openalex.org/W2614550605","https://openalex.org/W2736470227","https://openalex.org/W2749434195","https://openalex.org/W2791686384","https://openalex.org/W2808058646","https://openalex.org/W2901285216","https://openalex.org/W2937881934","https://openalex.org/W2946691456","https://openalex.org/W2964243274","https://openalex.org/W2967220154","https://openalex.org/W2970431018","https://openalex.org/W2972359262","https://openalex.org/W2972606665","https://openalex.org/W2973177710","https://openalex.org/W4301493784","https://openalex.org/W6638343645","https://openalex.org/W6675925002","https://openalex.org/W6732215457","https://openalex.org/W6917585676"],"related_works":["https://openalex.org/W2241334379","https://openalex.org/W2779947354","https://openalex.org/W2583925611","https://openalex.org/W286530742","https://openalex.org/W3005278806","https://openalex.org/W1560904020","https://openalex.org/W3020951519","https://openalex.org/W3173084154","https://openalex.org/W2334448532","https://openalex.org/W4376849848"],"abstract_inverted_index":{"Breathing":[0],"and":[1,31,100,128],"speech":[2,6,16,112],"planning":[3],"in":[4,102],"spontaneous":[5,111],"are":[7],"coordinated":[8],"processes,":[9],"often":[10],"exhibiting":[11],"disfluent":[12,41],"patterns.":[13],"While":[14],"synthetic":[15,38],"is":[17],"not":[18],"subject":[19],"to":[20,67,134],"respiratory":[21],"needs,":[22],"integrating":[23],"breath":[24,81,143],"into":[25,70],"synthesis":[26],"has":[27],"advantages":[28],"for":[29],"naturalness":[30],"recall.":[32],"At":[33],"the":[34,46,89,131,138,145],"same":[35],"time,":[36],"a":[37,61,84,115],"voice":[39],"reproducing":[40],"breathing":[42,118],"patterns":[43],"learned":[44],"from":[45],"data":[47],"can":[48],"be":[49,135],"problematic.":[50],"To":[51],"address":[52],"this,":[53],"we":[54,73],"first":[55],"propose":[56],"training":[57],"stochastic":[58],"TTS":[59],"on":[60,123],"corpus":[62],"of":[63,79,91],"overlapping":[64],"breath-group":[65],"bigrams,":[66],"take":[68],"context":[69],"account.":[71],"Next,":[72],"introduce":[74],"an":[75,109],"unsupervised":[76],"automatic":[77],"annotation":[78,106],"likely-disfluent":[80],"events,":[82],"through":[83],"product-of-experts":[85],"model":[86],"that":[87],"combines":[88],"output":[90],"two":[92,124],"breath-":[93],"event":[94],"predictors,":[95],"each":[96],"using":[97],"complementary":[98],"information":[99],"operating":[101],"opposite":[103],"directions.":[104],"This":[105],"enables":[107],"creating":[108],"automatically-breathing":[110],"synthesiser":[113],"with":[114],"more":[116],"fluent":[117],"style.":[119],"A":[120],"subjective":[121],"evaluation":[122],"spoken":[125],"genres":[126],"(impromptu":[127],"rehearsed)":[129],"found":[130],"proposed":[132],"system":[133],"preferred":[136],"over":[137],"baseline":[139],"approach":[140],"treating":[141],"all":[142],"events":[144],"same.":[146]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
