{"id":"https://openalex.org/W4391021780","doi":"https://doi.org/10.1109/asru57964.2023.10389693","title":"COCO-NUT: Corpus of Japanese Utterance and Voice Characteristics Description for Prompt-Based Control","display_name":"COCO-NUT: Corpus of Japanese Utterance and Voice Characteristics Description for Prompt-Based Control","publication_year":2023,"publication_date":"2023-12-16","ids":{"openalex":"https://openalex.org/W4391021780","doi":"https://doi.org/10.1109/asru57964.2023.10389693"},"language":"en","primary_location":{"id":"doi:10.1109/asru57964.2023.10389693","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru57964.2023.10389693","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008391815","display_name":"Aya Watanabe","orcid":"https://orcid.org/0000-0003-3123-489X"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Aya Watanabe","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013050263","display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinnosuke Takamichi","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083394213","display_name":"Yuki Saito","orcid":"https://orcid.org/0000-0002-7967-2613"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuki Saito","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068962900","display_name":"Wataru Nakata","orcid":"https://orcid.org/0000-0003-3953-6534"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Wataru Nakata","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072398606","display_name":"Detai Xin","orcid":"https://orcid.org/0009-0007-1908-1137"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Detai Xin","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003814223","display_name":"Hiroshi Saruwatari","orcid":"https://orcid.org/0000-0003-0876-5617"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi Saruwatari","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5008391815"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.9803,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.79906015,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8358206748962402},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.6628778576850891},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.6401933431625366},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6004272699356079},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.535376250743866},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5290253162384033},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5266406536102295},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.5058534145355225},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5046960115432739},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4854305386543274},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4694015085697174},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.414814829826355},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.13339954614639282}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8358206748962402},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.6628778576850891},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.6401933431625366},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6004272699356079},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.535376250743866},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5290253162384033},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5266406536102295},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.5058534145355225},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5046960115432739},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4854305386543274},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4694015085697174},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.414814829826355},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.13339954614639282},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru57964.2023.10389693","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru57964.2023.10389693","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7799999713897705,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322832","display_name":"University of Tokyo","ror":"https://ror.org/057zh3y96"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1581272224","https://openalex.org/W2016381774","https://openalex.org/W2787685498","https://openalex.org/W2800448574","https://openalex.org/W2886641317","https://openalex.org/W2890964092","https://openalex.org/W2896457183","https://openalex.org/W2913668833","https://openalex.org/W2952122856","https://openalex.org/W2965373594","https://openalex.org/W2972359262","https://openalex.org/W2972473628","https://openalex.org/W3015591594","https://openalex.org/W3024308166","https://openalex.org/W3034775979","https://openalex.org/W3081800019","https://openalex.org/W3097892637","https://openalex.org/W3129576130","https://openalex.org/W3160743249","https://openalex.org/W3193828514","https://openalex.org/W3196475561","https://openalex.org/W3198694222","https://openalex.org/W3198791321","https://openalex.org/W3209059054","https://openalex.org/W4224929761","https://openalex.org/W4226442948","https://openalex.org/W4226502599","https://openalex.org/W4293575120","https://openalex.org/W4303440777","https://openalex.org/W4311000453","https://openalex.org/W4318351475","https://openalex.org/W4319166718","https://openalex.org/W4372183465","https://openalex.org/W4372260310","https://openalex.org/W4372266552","https://openalex.org/W4372346311","https://openalex.org/W4375869257","https://openalex.org/W4398152753","https://openalex.org/W6755207826","https://openalex.org/W6766673545","https://openalex.org/W6790978476","https://openalex.org/W6791353385","https://openalex.org/W6810629572","https://openalex.org/W6838439425","https://openalex.org/W6841982715","https://openalex.org/W6845281891","https://openalex.org/W6847363464","https://openalex.org/W6848729602","https://openalex.org/W6849105126","https://openalex.org/W6849798658"],"related_works":["https://openalex.org/W187174317","https://openalex.org/W4200068392","https://openalex.org/W2772686614","https://openalex.org/W2036933852","https://openalex.org/W1835173349","https://openalex.org/W23627563","https://openalex.org/W2152945827","https://openalex.org/W114226241","https://openalex.org/W2015513221","https://openalex.org/W301864623"],"abstract_inverted_index":{"In":[0],"text-to-speech,":[1],"controlling":[2],"voice":[3,33,43,85],"characteristics":[4,86],"is":[5,63],"important":[6],"in":[7],"achieving":[8],"various-purpose":[9],"speech":[10],"synthesis.":[11],"Considering":[12],"the":[13,104,121],"success":[14],"of":[15,32,39,95,99],"text-conditioned":[16],"generation,":[17],"such":[18,51],"as":[19],"text-to-image,":[20],"free-form":[21,47,84],"text":[22,81],"instruction":[23],"should":[24],"be":[25],"useful":[26],"for":[27],"intuitive":[28],"and":[29,41,83,109],"complicated":[30],"control":[31,52],"characteristics.":[34],"A":[35],"sufficiently":[36],"large":[37],"corpus":[38,58,74,93,119],"high-quality":[40],"diverse":[42,76],"samples":[44],"with":[45,80],"corresponding":[46],"descriptions":[48],"can":[49],"advance":[50],"research.":[53],"However,":[54],"neither":[55],"an":[56],"open":[57],"nor":[59],"a":[60,72],"scalable":[61],"method":[62],"currently":[64],"available.":[65],"To":[66],"this":[67,92],"end,":[68],"we":[69,116],"develop":[70],"Coco-Nut,":[71],"new":[73],"including":[75],"Japanese":[77],"utterances,":[78],"along":[79],"transcriptions":[82],"descriptions.":[87],"Our":[88],"methodology":[89],"to":[90],"construct":[91],"consists":[94],"1)":[96],"automatic":[97],"collection":[98],"voice-related":[100],"audio":[101],"data":[102],"from":[103],"Internet,":[105],"2)":[106],"quality":[107],"assurance,":[108],"3)":[110],"manual":[111],"annotation":[112],"using":[113],"crowdsourcing.":[114],"Additionally,":[115],"benchmark":[117],"our":[118],"on":[120],"prompt":[122],"embedding":[123],"model":[124],"trained":[125],"by":[126],"contrastive":[127],"speech-text":[128],"learning.":[129]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
