{"id":"https://openalex.org/W4296068791","doi":"https://doi.org/10.21437/interspeech.2022-444","title":"J-MAC: Japanese multi-speaker audiobook corpus for speech synthesis","display_name":"J-MAC: Japanese multi-speaker audiobook corpus for speech synthesis","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4296068791","doi":"https://doi.org/10.21437/interspeech.2022-444"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-444","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-444","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013050263","display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Shinnosuke Takamichi","raw_affiliation_strings":["The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068962900","display_name":"Wataru Nakata","orcid":"https://orcid.org/0000-0003-3953-6534"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Wataru Nakata","raw_affiliation_strings":["The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059290592","display_name":"Naoko Tanji","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Naoko Tanji","raw_affiliation_strings":["The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003814223","display_name":"Hiroshi Saruwatari","orcid":"https://orcid.org/0000-0003-0876-5617"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi Saruwatari","raw_affiliation_strings":["The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5013050263"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.5197,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.63386124,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2358","last_page":"2362"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9496999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9132999777793884,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7550864219665527},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7080386877059937},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.514151394367218},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39464491605758667},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3251799941062927}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7550864219665527},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7080386877059937},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.514151394367218},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39464491605758667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3251799941062927}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-444","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-444","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5099999904632568,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W37526647","https://openalex.org/W72347498","https://openalex.org/W1516533146","https://openalex.org/W2015687745","https://openalex.org/W2251812144","https://openalex.org/W2805064398","https://openalex.org/W2903739847","https://openalex.org/W2952218014","https://openalex.org/W2963971656","https://openalex.org/W2964243274","https://openalex.org/W2972359262","https://openalex.org/W2975429091","https://openalex.org/W2996286887","https://openalex.org/W3008691130","https://openalex.org/W3026362080","https://openalex.org/W3033411150","https://openalex.org/W3043783436","https://openalex.org/W3092028330","https://openalex.org/W3097892637","https://openalex.org/W3129651364","https://openalex.org/W3162746464","https://openalex.org/W3163339651","https://openalex.org/W3196027980","https://openalex.org/W4287117449","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2530322880","https://openalex.org/W1596801655","https://openalex.org/W3204019825"],"abstract_inverted_index":{"In":[0],"this":[1,56],"paper,":[2],"we":[3,58,90],"construct":[4],"a":[5,36,60,64],"Japanese":[6],"audiobook":[7,49,123,133],"speech":[8,13,20,33,50,124,134],"corpus":[9,65],"called":[10],"\"J-MAC\"":[11],"for":[12],"synthesis":[14,34,125],"research.With":[15],"the":[16,22,53,84,113,128],"success":[17],"of":[18,62],"reading-style":[19,44],"synthesis,":[21],"research":[23],"target":[24],"is":[25,35,115],"shifting":[26],"to":[27,94,101,111],"tasks":[28],"that":[29,39],"use":[30,91],"complicated":[31],"contexts.Audiobook":[32],"good":[37],"example":[38],"requires":[40],"cross-sentence,":[41],"expressiveness,":[42],"etc.Unlike":[43],"speech,":[45],"speaker-specific":[46],"expressiveness":[47],"in":[48,117],"also":[51,121],"becomes":[52],"context.To":[54],"enhance":[55],"research,":[57],"propose":[59],"method":[61,78],"constructing":[63],"from":[66],"audiobooks":[67,73],"read":[68],"by":[69],"professional":[70],"speakers.From":[71],"many":[72],"and":[74,82,105,107,127],"their":[75],"texts,":[76],"our":[77,118],"can":[79],"automatically":[80],"extract":[81,95],"refine":[83,112],"data":[85],"without":[86],"any":[87],"language":[88],"dependency.Specifically,":[89],"vocal-instrumental":[92],"separation":[93],"clean":[96],"data,":[97],"connectionist":[98],"temporal":[99],"classification":[100],"roughly":[102],"align":[103],"text":[104],"audio,":[106],"voice":[108],"activity":[109],"detection":[110],"alignment.J-MAC":[114],"open-sourced":[116],"project":[119],"page.We":[120],"conduct":[122],"evaluations,":[126],"results":[129],"give":[130],"insights":[131],"into":[132],"synthesis.":[135]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
