{"id":"https://openalex.org/W4297841267","doi":"https://doi.org/10.21437/interspeech.2022-11223","title":"Towards Cross-speaker Reading Style Transfer on Audiobook Dataset","display_name":"Towards Cross-speaker Reading Style Transfer on Audiobook Dataset","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4297841267","doi":"https://doi.org/10.21437/interspeech.2022-11223"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-11223","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11223","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100330993","display_name":"Xiang Li","orcid":"https://orcid.org/0000-0001-5471-1236"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Xiang Li","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","institution_ids":["https://openalex.org/I889458895"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048029954","display_name":"Changhe Song","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Changhe Song","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","institution_ids":["https://openalex.org/I889458895"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078723603","display_name":"Xianhao Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Xianhao Wei","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","Department of Computer Science and Technology, Tsinghua University, Beijing, China","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","institution_ids":["https://openalex.org/I889458895"]},{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102869280","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0001-8533-0524"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","institution_ids":["https://openalex.org/I889458895"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100405572","display_name":"Jia Jia","orcid":"https://orcid.org/0000-0002-7336-4003"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Jia Jia","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","institution_ids":["https://openalex.org/I889458895"]},{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019458385","display_name":"Helen Meng","orcid":"https://orcid.org/0000-0002-4427-3532"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Helen Meng","raw_affiliation_strings":["Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,"],"affiliations":[{"raw_affiliation_string":"Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems,","institution_ids":["https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100330993"],"corresponding_institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I889458895","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.5218,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.63842077,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"5528","last_page":"5532"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6752266883850098},{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.6347288489341736},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.6020782589912415},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5218283534049988},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3855312168598175},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3741310238838196},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.16336223483085632},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.054986268281936646}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6752266883850098},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.6347288489341736},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.6020782589912415},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5218283534049988},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3855312168598175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3741310238838196},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.16336223483085632},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.054986268281936646},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-11223","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11223","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8399999737739563}],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2123497280","display_name":null,"funder_award_id":"202106","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2603945996","display_name":null,"funder_award_id":"62076144","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2981938667","display_name":null,"funder_award_id":"Shenzhen","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2794490148","https://openalex.org/W2896457183","https://openalex.org/W2932022923","https://openalex.org/W2963609956","https://openalex.org/W2964138190","https://openalex.org/W2964243274","https://openalex.org/W2973158936","https://openalex.org/W3092028330","https://openalex.org/W3094785744","https://openalex.org/W3161113899","https://openalex.org/W3162794600","https://openalex.org/W3206725777","https://openalex.org/W3213227271","https://openalex.org/W4221154745","https://openalex.org/W4287236468","https://openalex.org/W4295731579","https://openalex.org/W4297914850","https://openalex.org/W4319586674"],"related_works":["https://openalex.org/W2356229341","https://openalex.org/W2082438799","https://openalex.org/W2349768204","https://openalex.org/W1966986837","https://openalex.org/W2360138227","https://openalex.org/W4365808155","https://openalex.org/W1838455177","https://openalex.org/W1489909378","https://openalex.org/W2392697679","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Cross-speaker":[0],"style":[1,8,33,37,45,66,72,88,108,118,139],"transfer":[2,38,135],"aims":[3,81],"to":[4,35,82,90,122,134,140],"extract":[5],"the":[6,10,19,54,70,93,97,110,115,123,131,144,154,157],"speech":[7],"of":[9,21,125,146,156],"given":[11,137],"reference":[12],"speech,":[13],"which":[14],"can":[15],"be":[16],"reproduced":[17],"in":[18,100,160],"timbre":[20,106,124],"arbitrary":[22],"target":[23,142],"speakers.Existing":[24],"methods":[25],"on":[26],"this":[27],"topic":[28],"have":[29],"explored":[30],"utilizing":[31],"utterance-level":[32,65],"labels":[34],"perform":[36],"via":[39],"either":[40],"global":[41,58,94,150],"or":[42],"local":[43,55,98,147],"scale":[44],"representations.However,":[46],"audiobook":[47,101,162],"datasets":[48],"are":[49,61],"typically":[50],"characterized":[51],"by":[52,64,103],"both":[53,92],"prosody":[56,99,148],"and":[57,60,96,107,149],"genre,":[59],"rarely":[62],"accompanied":[63],"labels.Thus,":[67],"properly":[68],"transferring":[69],"reading":[71,117,138],"across":[73],"different":[74,126],"speakers":[75],"remains":[76],"a":[77,84,136],"challenging":[78],"task.This":[79],"paper":[80],"introduce":[83],"chunk-wise":[85],"multi-scale":[86],"cross-speaker":[87],"model":[89,132],"capture":[91],"genre":[95,151],"speeches.Moreover,":[102],"disentangling":[104],"speaker":[105],"with":[109],"proposed":[111,158],"switchable":[112],"adversarial":[113],"classifiers,":[114],"extracted":[116],"is":[119,164],"made":[120],"adaptable":[121],"speakers.Experiment":[127],"results":[128],"confirm":[129],"that":[130],"manages":[133],"new":[141],"speakers.With":[143],"support":[145],"type":[152],"predictor,":[153],"potentiality":[155],"method":[159],"multi-speaker":[161],"generation":[163],"further":[165],"revealed.":[166]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
