{"id":"https://openalex.org/W4372260474","doi":"https://doi.org/10.1109/icassp49357.2023.10095751","title":"DailyTalk: Spoken Dialogue Dataset for Conversational Text-to-Speech","display_name":"DailyTalk: Spoken Dialogue Dataset for Conversational Text-to-Speech","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372260474","doi":"https://doi.org/10.1109/icassp49357.2023.10095751"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095751","is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095751","pdf_url":"https://ieeexplore.ieee.org/ielx7/10094559/10094560/10095751.pdf","source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/10094559/10094560/10095751.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011377269","display_name":"Keon Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Keon Lee","raw_affiliation_strings":["KAIST,School of Computing,Rep. of Korea","KRAFTON Inc, Rep. of Korea","School of Computing, KAIST, Rep. of Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,School of Computing,Rep. of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"KRAFTON Inc, Rep. of Korea","institution_ids":[]},{"raw_affiliation_string":"School of Computing, KAIST, Rep. of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026121157","display_name":"Kyumin Park","orcid":"https://orcid.org/0000-0001-6511-8972"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kyumin Park","raw_affiliation_strings":["KAIST,School of Computing,Rep. of Korea","School of Computing, KAIST, Rep. of Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,School of Computing,Rep. of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"School of Computing, KAIST, Rep. of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100412739","display_name":"Daeyoung Kim","orcid":"https://orcid.org/0000-0002-7960-5955"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Daeyoung Kim","raw_affiliation_strings":["KAIST,School of Computing,Rep. of Korea","School of Computing, KAIST, Rep. of Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,School of Computing,Rep. of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"School of Computing, KAIST, Rep. of Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5011377269"],"corresponding_institution_ids":["https://openalex.org/I157485424"],"apc_list":null,"apc_paid":null,"fwci":5.5614,"has_fulltext":false,"cited_by_count":32,"citation_normalized_percentile":{"value":0.96726448,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.8973615765571594},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7948476672172546},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6230680346488953},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5275019407272339},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5202590227127075},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4505225121974945},{"id":"https://openalex.org/keywords/license","display_name":"License","score":0.42293471097946167},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.42003822326660156},{"id":"https://openalex.org/keywords/mit-license","display_name":"MIT License","score":0.41028571128845215}],"concepts":[{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.8973615765571594},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7948476672172546},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6230680346488953},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5275019407272339},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5202590227127075},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4505225121974945},{"id":"https://openalex.org/C2780560020","wikidata":"https://www.wikidata.org/wiki/Q79719","display_name":"License","level":2,"score":0.42293471097946167},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.42003822326660156},{"id":"https://openalex.org/C174183944","wikidata":"https://www.wikidata.org/wiki/Q334661","display_name":"MIT License","level":3,"score":0.41028571128845215},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095751","is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095751","pdf_url":"https://ieeexplore.ieee.org/ielx7/10094559/10094560/10095751.pdf","source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1109/icassp49357.2023.10095751","is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095751","pdf_url":"https://ieeexplore.ieee.org/ielx7/10094559/10094560/10095751.pdf","source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4372260474.pdf","grobid_xml":"https://content.openalex.org/works/W4372260474.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W2747874407","https://openalex.org/W2963686995","https://openalex.org/W2964243274","https://openalex.org/W2972359262","https://openalex.org/W3151309757","https://openalex.org/W3194208059","https://openalex.org/W3196001064","https://openalex.org/W3198104520","https://openalex.org/W4295036296","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6745297980","https://openalex.org/W6778823374","https://openalex.org/W6917585676"],"related_works":["https://openalex.org/W2036021480","https://openalex.org/W2546377002","https://openalex.org/W4285209474","https://openalex.org/W22571951","https://openalex.org/W2046541848","https://openalex.org/W2791776396","https://openalex.org/W4384009753","https://openalex.org/W2964820744","https://openalex.org/W4247858078","https://openalex.org/W2378944243"],"abstract_inverted_index":{"The":[0,108],"majority":[1],"of":[2,10,51],"current":[3],"Text-to-Speech":[4],"(TTS)":[5],"datasets,":[6],"which":[7],"are":[8,114],"collections":[9],"individual":[11],"utterances,":[12],"contain":[13],"few":[14],"conversational":[15,25,30],"aspects.":[16],"In":[17],"this":[18],"paper,":[19],"we":[20,54,84],"introduce":[21],"DailyTalk,":[22],"a":[23,62,71,92],"high-quality":[24],"speech":[26],"dataset":[27,43,110],"designed":[28],"for":[29,117],"TTS.":[31],"We":[32],"sampled,":[33],"modified,":[34],"and":[35,80,96,111],"recorded":[36],"2,541":[37],"dialogues":[38],"from":[39,106],"the":[40,74],"open-domain":[41],"dialogue":[42],"DailyDialog":[44],"inheriting":[45],"its":[46],"annotated":[47],"attributes.":[48],"On":[49],"top":[50],"our":[52,59,81,100],"dataset,":[53,95],"extend":[55],"prior":[56],"work":[57],"as":[58,91],"baseline,":[60],"where":[61],"non-autoregressive":[63],"TTS":[64,94],"is":[65],"conditioned":[66],"on":[67],"historical":[68],"information":[69,105],"in":[70],"dialogue.":[72],"From":[73],"baseline":[75,101,112],"experiment":[76],"with":[77,120],"both":[78],"general":[79,93],"novel":[82],"metrics,":[83],"show":[85],"that":[86],"DailyTalk":[87,109],"can":[88,102],"be":[89],"used":[90],"more":[97],"than":[98],"that,":[99],"represent":[103],"contextual":[104],"DailyTalk.":[107],"code":[113],"freely":[115],"available":[116],"academic":[118],"use":[119],"CC-BY-SA":[121],"4.0":[122],"license":[123],"<sup":[124],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[125],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[126],".":[127]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":3}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
