{"id":"https://openalex.org/W4392902718","doi":"https://doi.org/10.1109/icassp48485.2024.10446806","title":"Dialog Modeling in Audiobook Synthesis","display_name":"Dialog Modeling in Audiobook Synthesis","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902718","doi":"https://doi.org/10.1109/icassp48485.2024.10446806"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446806","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446806","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074874599","display_name":"Cheng-chieh Yeh","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Cheng-Chieh Yeh","raw_affiliation_strings":["Apple,USA","Apple, USA"],"affiliations":[{"raw_affiliation_string":"Apple,USA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"Apple, USA","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039967308","display_name":"Amirreza Shirani","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amirreza Shirani","raw_affiliation_strings":["Apple,USA","Apple, USA"],"affiliations":[{"raw_affiliation_string":"Apple,USA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"Apple, USA","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107912452","display_name":"Weicheng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weicheng Zhang","raw_affiliation_strings":["Apple,USA","Apple, USA"],"affiliations":[{"raw_affiliation_string":"Apple,USA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"Apple, USA","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000571465","display_name":"Tuomo Raitio","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tuomo Raitio","raw_affiliation_strings":["Apple,USA","Apple, USA"],"affiliations":[{"raw_affiliation_string":"Apple,USA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"Apple, USA","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015798689","display_name":"Ramya Rasipuram","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ramya Rasipuram","raw_affiliation_strings":["Apple,USA","Apple, USA"],"affiliations":[{"raw_affiliation_string":"Apple,USA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"Apple, USA","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010570444","display_name":"Ladan Golipour","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ladan Golipour","raw_affiliation_strings":["Apple,USA","Apple, USA"],"affiliations":[{"raw_affiliation_string":"Apple,USA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"Apple, USA","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064317196","display_name":"David Winarsky","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Winarsky","raw_affiliation_strings":["Apple,USA","Apple, USA"],"affiliations":[{"raw_affiliation_string":"Apple,USA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"Apple, USA","institution_ids":["https://openalex.org/I4210153776"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5074874599"],"corresponding_institution_ids":["https://openalex.org/I4210153776"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02582316,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"13341","last_page":"13345"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dialog-box","display_name":"Dialog box","score":0.9140714406967163},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7298141717910767},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6115901470184326},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5638257265090942},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5350314974784851},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.498950719833374},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49387627840042114},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4473086893558502},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.137457937002182},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.13575059175491333},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.0895625650882721}],"concepts":[{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.9140714406967163},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7298141717910767},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6115901470184326},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5638257265090942},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5350314974784851},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.498950719833374},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49387627840042114},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4473086893558502},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.137457937002182},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.13575059175491333},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0895625650882721}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446806","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446806","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1490960179","https://openalex.org/W1513874326","https://openalex.org/W2788357188","https://openalex.org/W2801608439","https://openalex.org/W2885800352","https://openalex.org/W2889141918","https://openalex.org/W2904459034","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2967220154","https://openalex.org/W3045691107","https://openalex.org/W3097892637","https://openalex.org/W3115920274","https://openalex.org/W3160329778","https://openalex.org/W3202098869","https://openalex.org/W4210684870","https://openalex.org/W4283771593","https://openalex.org/W4296068776","https://openalex.org/W4296068797","https://openalex.org/W4385823032","https://openalex.org/W4385993905","https://openalex.org/W6703420464","https://openalex.org/W6746700228","https://openalex.org/W6748409065","https://openalex.org/W6750489868","https://openalex.org/W6755207826","https://openalex.org/W6778823374","https://openalex.org/W6779068807","https://openalex.org/W6838831139"],"related_works":["https://openalex.org/W2098987383","https://openalex.org/W2417260800","https://openalex.org/W1596203174","https://openalex.org/W2117933979","https://openalex.org/W2283130723","https://openalex.org/W103938586","https://openalex.org/W2104718772","https://openalex.org/W4233992201","https://openalex.org/W1977846844","https://openalex.org/W2368721880"],"abstract_inverted_index":{"In":[0,19],"audiobook":[1,28,106],"synthesis,":[2],"it":[3],"is":[4,43,67,143],"important":[5],"to":[6,10,45,69,74,145],"have":[7],"the":[8,55,70,79,94,102,147,150],"ability":[9],"differentiate":[11],"between":[12],"dialog":[13,24,40,49,64,130],"and":[14,52,60],"narration":[15,47],"or":[16],"different":[17],"characters.":[18],"this":[20],"work,":[21],"we":[22,90],"propose":[23],"modeling":[25],"methods":[26],"for":[27],"synthesis.":[29],"The":[30,121],"proposed":[31,103,151],"approach":[32,104],"consists":[33],"of":[34,96,126,149],"two":[35],"stages.":[36],"First,":[37],"a":[38,63,83,97,109,138],"text-based":[39],"style":[41,65,88],"classifier":[42],"employed":[44],"predict":[46,54],"vs.":[48],"from":[50],"text,":[51],"further":[53],"corresponding":[56,80],"characters":[57],"into":[58],"soprano":[59],"baritone.":[61],"Then,":[62],"adaptor":[66],"added":[68],"text-to-speech":[71],"(TTS)":[72],"model":[73],"allow":[75],"synthesizing":[76],"speech":[77],"with":[78,108],"styles.":[81],"With":[82],"speaker":[84],"verification":[85],"(SV)":[86],"based":[87],"adaptor,":[89],"can":[91],"even":[92],"control":[93],"strength":[95],"given":[98],"style.":[99],"We":[100],"evaluated":[101],"in":[105,134],"synthesis":[107],"mean":[110],"opinion":[111],"score":[112],"(MOS)":[113],"listening":[114],"test":[115,142],"using":[116],"9":[117],"carefully":[118],"designed":[119],"questions.":[120],"results":[122],"show":[123],"an":[124],"improvement":[125],"0.35":[127],"MOS":[128,140],"on":[129],"distinction":[131],"without":[132],"degradation":[133],"other":[135],"aspects.":[136],"Also":[137],"comparative":[139],"(CMOS)":[141],"conducted":[144],"verify":[146],"effectiveness":[148],"method.":[152]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
