{"id":"https://openalex.org/W6888513919","doi":"https://doi.org/10.21227/wpxz-3c67","title":"MTC-VC: A Multi-Task Contrastive Learning Method for Efficient and Controllable Voice Cloning","display_name":"MTC-VC: A Multi-Task Contrastive Learning Method for Efficient and Controllable Voice Cloning","publication_year":2025,"publication_date":"2025-04-07","ids":{"openalex":"https://openalex.org/W6888513919","doi":"https://doi.org/10.21227/wpxz-3c67"},"language":"en","primary_location":{"id":"doi:10.21227/wpxz-3c67","is_oa":true,"landing_page_url":"https://doi.org/10.21227/wpxz-3c67","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.21227/wpxz-3c67","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zhou, Rui","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156189","display_name":"Shanghai Dianji University","ror":"https://ror.org/055fene14","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210156189"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhou, Rui","raw_affiliation_strings":["School of Design and Art, Shanghai Dianji University"],"affiliations":[{"raw_affiliation_string":"School of Design and Art, Shanghai Dianji University","institution_ids":["https://openalex.org/I4210156189"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210156189"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.36980000138282776},{"id":"https://openalex.org/keywords/ranging","display_name":"Ranging","score":0.36970001459121704},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.3546000123023987},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3481999933719635},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.33230000734329224},{"id":"https://openalex.org/keywords/acoustic-phonetics","display_name":"Acoustic phonetics","score":0.3188000023365021}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7196999788284302},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6894000172615051},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42809998989105225},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3970000147819519},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.36980000138282776},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.36970001459121704},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3546000123023987},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3481999933719635},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.32919999957084656},{"id":"https://openalex.org/C2781032008","wikidata":"https://www.wikidata.org/wiki/Q424083","display_name":"Acoustic phonetics","level":3,"score":0.3188000023365021},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.3003999888896942},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.2782999873161316},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21227/wpxz-3c67","is_oa":true,"landing_page_url":"https://doi.org/10.21227/wpxz-3c67","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.21227/wpxz-3c67","is_oa":true,"landing_page_url":"https://doi.org/10.21227/wpxz-3c67","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[{"score":0.6155976057052612,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,13,60],"LibriSpeech":[1],"corpus,":[2],"a":[3,42],"publicly":[4],"available":[5],"English":[6],"speech":[7,23,65],"dataset":[8,61],"derived":[9],"from":[10,24,55],"audiobook":[11],"recordings.":[12],"corpus":[14],"contains":[15],"approximately":[16],"1,000":[17],"hours":[18],"of":[19,39,44],"16":[20],"kHz":[21],"read":[22],"over":[25],"2,400":[26],"speakers,":[27],"encompassing":[28],"diverse":[29],"speaking":[30],"styles,":[31],"rates,":[32],"and":[33,74],"regional":[34],"accents.":[35],"For":[36],"the":[37],"purpose":[38],"contrastive":[40],"learning,":[41],"subset":[43],"100":[45],"speakers":[46],"was":[47],"sampled,":[48],"with":[49],"20":[50],"utterances":[51],"per":[52],"speaker":[53,70],"ranging":[54],"3":[56],"to":[57],"10":[58],"seconds.":[59],"provides":[62],"clean,":[63],"labeled":[64],"suitable":[66],"for":[67],"tasks":[68],"involving":[69],"representation,":[71],"acoustic":[72],"modeling,":[73],"multi-style":[75],"synthesis.":[76]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
