{"id":"https://openalex.org/W4388579593","doi":"https://doi.org/10.1109/taslp.2023.3331813","title":"Joint Multiscale Cross-Lingual Speaking Style Transfer With Bidirectional Attention Mechanism for Automatic Dubbing","display_name":"Joint Multiscale Cross-Lingual Speaking Style Transfer With Bidirectional Attention Mechanism for Automatic Dubbing","publication_year":2023,"publication_date":"2023-11-10","ids":{"openalex":"https://openalex.org/W4388579593","doi":"https://doi.org/10.1109/taslp.2023.3331813"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3331813","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3331813","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047338075","display_name":"Jingbei Li","orcid":"https://orcid.org/0000-0002-6284-5979"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Jingbei Li","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054180274","display_name":"Sipan Li","orcid":"https://orcid.org/0000-0003-3322-0547"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Sipan Li","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100633279","display_name":"Ping Chen","orcid":"https://orcid.org/0009-0006-4445-4399"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Ping Chen","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101861149","display_name":"Luwen Zhang","orcid":"https://orcid.org/0000-0003-3576-6252"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Luwen Zhang","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100619865","display_name":"Meng Yi","orcid":"https://orcid.org/0009-0006-8288-4226"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Yi Meng","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102869280","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0001-8533-0524"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019458385","display_name":"Helen Meng","orcid":"https://orcid.org/0000-0002-4427-3532"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Helen Meng","raw_affiliation_strings":["Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong, SAR, China"],"affiliations":[{"raw_affiliation_string":"Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103162279","display_name":"Qiao Tian","orcid":"https://orcid.org/0000-0002-4078-1273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao Tian","raw_affiliation_strings":["ByteDance, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"ByteDance, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100339106","display_name":"Yu\u2010Ping Wang","orcid":"https://orcid.org/0000-0001-9340-5864"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuping Wang","raw_affiliation_strings":["ByteDance, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"ByteDance, Shanghai, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103854502","display_name":"Yuxuan Wang","orcid":"https://orcid.org/0000-0001-8269-3354"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuxuan Wang","raw_affiliation_strings":["ByteDance, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"ByteDance, Shanghai, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5047338075"],"corresponding_institution_ids":["https://openalex.org/I889458895","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.8741,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.79342816,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"517","last_page":"528"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7642408609390259},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.7356824278831482},{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.6237553954124451},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.528292179107666},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.5123027563095093},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.5068476796150208},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47705379128456116},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4673148989677429},{"id":"https://openalex.org/keywords/first-language","display_name":"First language","score":0.45638397336006165},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.4463212490081787},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3795158267021179},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.36533528566360474}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7642408609390259},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.7356824278831482},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.6237553954124451},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.528292179107666},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.5123027563095093},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.5068476796150208},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47705379128456116},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4673148989677429},{"id":"https://openalex.org/C171041071","wikidata":"https://www.wikidata.org/wiki/Q36870","display_name":"First language","level":2,"score":0.45638397336006165},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.4463212490081787},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3795158267021179},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.36533528566360474},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2023.3331813","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3331813","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2603945996","display_name":null,"funder_award_id":"62076144","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8460738343","display_name":null,"funder_award_id":"WDZC20220816140515001","funder_id":"https://openalex.org/F4320326705","funder_display_name":"Science, Technology and Innovation Commission of Shenzhen Municipality"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326705","display_name":"Science, Technology and Innovation Commission of Shenzhen Municipality","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W206967138","https://openalex.org/W1570629387","https://openalex.org/W1731081199","https://openalex.org/W2105594594","https://openalex.org/W2129142580","https://openalex.org/W2157331557","https://openalex.org/W2507912506","https://openalex.org/W2519091744","https://openalex.org/W2747874407","https://openalex.org/W2903739847","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2972425358","https://openalex.org/W2972473628","https://openalex.org/W2972495969","https://openalex.org/W2973034126","https://openalex.org/W2973043900","https://openalex.org/W2973084242","https://openalex.org/W3037838322","https://openalex.org/W3084396630","https://openalex.org/W3160329778","https://openalex.org/W3161049737","https://openalex.org/W3197704090","https://openalex.org/W4210777104","https://openalex.org/W4211186907","https://openalex.org/W4224931655","https://openalex.org/W4224935349","https://openalex.org/W4287854499","https://openalex.org/W4297841267","https://openalex.org/W4297841628","https://openalex.org/W4304013787","https://openalex.org/W4385822556","https://openalex.org/W6623517193","https://openalex.org/W6637618735","https://openalex.org/W6639480849","https://openalex.org/W6675365184","https://openalex.org/W6679434410","https://openalex.org/W6739901393","https://openalex.org/W6750489868","https://openalex.org/W6755207826","https://openalex.org/W6766978945","https://openalex.org/W6778823374","https://openalex.org/W6783867762","https://openalex.org/W6796464841","https://openalex.org/W6847477388"],"related_works":["https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W2038083449","https://openalex.org/W3177678247","https://openalex.org/W1999617572","https://openalex.org/W2944572343","https://openalex.org/W2333799855","https://openalex.org/W2351687372","https://openalex.org/W2004087835","https://openalex.org/W2055951479"],"abstract_inverted_index":{"Automatic":[0],"dubbing,":[1],"which":[2,92,217],"generates":[3],"a":[4,108,174,219],"corresponding":[5],"version":[6],"of":[7,83,213],"the":[8,32,39,43,47,53,56,71,74,80,99,119,129,156,163,193,197,211],"input":[9],"speech":[10,195],"in":[11,18,42,60,147,162,225],"another":[12],"language,":[13],"can":[14],"be":[15],"widely":[16],"utilized":[17,153,190],"many":[19],"real-world":[20],"scenarios,":[21],"such":[22,86],"as":[23,87],"video":[24],"and":[25,76,90,101,135,143,152,158,173,200,227],"game":[26],"localization.":[27],"In":[28,103],"addition":[29],"to":[30,46,50,96,116,154,191],"synthesizing":[31],"translated":[33],"scripts,":[34],"automatic":[35,66],"dubbing":[36,67],"further":[37],"transfers":[38],"speaking":[40,59,77,84,112,121,145,160,184,202],"style":[41,113,122],"original":[44],"language":[45,149,165],"dubbed":[48],"speeches":[49],"give":[51],"audiences":[52],"impression":[54],"that":[55],"characters":[57,100],"are":[58,93,150],"their":[61],"native":[62],"tongue.":[63],"However,":[64],"state-of-the-art":[65],"systems":[68],"only":[69,222],"model":[70,118],"transfer":[72,114,123,224],"on":[73],"duration":[75,223],"rate,":[78],"disregarding":[79],"other":[81,164],"aspects":[82],"style,":[85],"emotion,":[88],"intonation":[89],"emphasis,":[91],"also":[94],"crucial":[95],"fully":[97],"understand":[98],"speech.":[102],"this":[104],"paper,":[105],"we":[106],"propose":[107],"joint":[109],"multiscale":[110,183],"cross-lingual":[111],"framework":[115,169],"simultaneously":[117],"bidirectional":[120,176],"between":[124],"two":[125],"languages":[126],"at":[127],"both":[128,180],"global":[130,142,157,199],"scale":[131,137],"(i.e.,":[132,138],"utterance":[133],"level)":[134],"local":[136,144,159,201],"word":[139],"level).":[140],"The":[141,207],"styles":[146,161,203],"each":[148,171,205],"extracted":[151],"predict":[155],"with":[166,196,221],"an":[167],"encoder-decoder":[168],"for":[170,179,204],"direction":[172],"shared":[175],"attention":[177],"mechanism":[178],"directions.":[181],"A":[182],"style-enhanced":[185],"FastSpeech":[186],"2":[187],"is":[188],"then":[189],"synthesize":[192],"desired":[194],"predicted":[198],"language.":[206],"experimental":[208],"results":[209],"demonstrate":[210],"effectiveness":[212],"our":[214],"proposed":[215],"framework,":[216],"outperforms":[218],"baseline":[220],"objective":[226],"subjective":[228],"evaluations.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
