{"id":"https://openalex.org/W3206945595","doi":"https://doi.org/10.1145/3474085.3475198","title":"Face-based Voice Conversion: Learning the Voice behind a Face","display_name":"Face-based Voice Conversion: Learning the Voice behind a Face","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3206945595","doi":"https://doi.org/10.1145/3474085.3475198","mag":"3206945595"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475198","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475198","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003549856","display_name":"Hsiao-Han Lu","orcid":"https://orcid.org/0000-0002-6977-8342"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Hsiao-Han Lu","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108739951","display_name":"Shao-En Weng","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Shao-En Weng","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013754167","display_name":"Ya-Fan Yen","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ya-Fan Yen","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040050806","display_name":"Hong-Han Shuai","orcid":"https://orcid.org/0000-0003-2216-077X"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hong-Han Shuai","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000780442","display_name":"Wen-Huang Cheng","orcid":"https://orcid.org/0000-0002-4662-7875"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wen-Huang Cheng","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan Roc","institution_ids":["https://openalex.org/I148366613"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5003549856"],"corresponding_institution_ids":["https://openalex.org/I148366613"],"apc_list":null,"apc_paid":null,"fwci":1.2189,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.79567275,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"496","last_page":"505"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8445441722869873},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.6523486971855164},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.646806001663208},{"id":"https://openalex.org/keywords/facial-recognition-system","display_name":"Facial recognition system","score":0.42985332012176514},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4071761667728424},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.35490697622299194},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.29975199699401855}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8445441722869873},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.6523486971855164},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.646806001663208},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.42985332012176514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4071761667728424},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.35490697622299194},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.29975199699401855},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475198","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475198","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1581697293","https://openalex.org/W1826234144","https://openalex.org/W2041823554","https://openalex.org/W2056852181","https://openalex.org/W2076055233","https://openalex.org/W2096733369","https://openalex.org/W2124331435","https://openalex.org/W2187089797","https://openalex.org/W2341528187","https://openalex.org/W2396025094","https://openalex.org/W2403098732","https://openalex.org/W2406026380","https://openalex.org/W2527729766","https://openalex.org/W2747744257","https://openalex.org/W2887437849","https://openalex.org/W2897663715","https://openalex.org/W2899877258","https://openalex.org/W2902070858","https://openalex.org/W2937579788","https://openalex.org/W2962788625","https://openalex.org/W2963539064","https://openalex.org/W2963839617","https://openalex.org/W2964069186","https://openalex.org/W2964243274","https://openalex.org/W2970903655","https://openalex.org/W2972659941","https://openalex.org/W2979157532","https://openalex.org/W2981985754","https://openalex.org/W3000145882","https://openalex.org/W3015434413","https://openalex.org/W3015734344","https://openalex.org/W3015826515","https://openalex.org/W3016243847","https://openalex.org/W3020895012","https://openalex.org/W3034257218","https://openalex.org/W3035626590","https://openalex.org/W3035753399","https://openalex.org/W3093010840","https://openalex.org/W3093077034","https://openalex.org/W3093411628","https://openalex.org/W3096524539","https://openalex.org/W3101481642","https://openalex.org/W3101998545","https://openalex.org/W3110013267","https://openalex.org/W3123318516","https://openalex.org/W3124479915","https://openalex.org/W4212774754","https://openalex.org/W4289665794"],"related_works":["https://openalex.org/W3188962172","https://openalex.org/W2772917594","https://openalex.org/W4312825515","https://openalex.org/W4306742369","https://openalex.org/W4303457083","https://openalex.org/W304855073","https://openalex.org/W2131146434","https://openalex.org/W2951359407","https://openalex.org/W4376623224","https://openalex.org/W2384651879"],"abstract_inverted_index":{"Zero-shot":[0],"voice":[1,34,55,69,83,142,170],"conversion":[2,143,171],"(VC)":[3],"trained":[4,96],"by":[5],"non-parallel":[6],"data":[7],"has":[8],"gained":[9],"a":[10,39,47,63,139,151],"lot":[11],"of":[12,66],"attention":[13],"in":[14],"recent":[15],"years.":[16],"Previous":[17],"methods":[18],"usually":[19],"extract":[20],"speaker":[21,101],"embeddings":[22],"from":[23,57,71],"audios":[24,110],"and":[25,45,109,123,156],"use":[26],"them":[27],"for":[28],"converting":[29],"the":[30,86,92,104,116,120,126,160,165,174,184],"voices":[31],"into":[32],"different":[33,68,72],"styles.":[35],"Since":[36],"there":[37],"is":[38,89],"strong":[40],"relationship":[41,88],"between":[42],"human":[43,73],"faces":[44],"voices,":[46],"promising":[48],"approach":[49],"would":[50],"be":[51,181],"to":[52,130,173],"synthesize":[53],"various":[54],"characteristics":[56],"face":[58,74,176],"representation.":[59],"Therefore,":[60],"we":[61,137],"introduce":[62],"novel":[64,140],"idea":[65],"generating":[67],"styles":[70],"photos,":[75,102],"which":[76],"can":[77,180],"facilitate":[78],"new":[79],"applications,":[80],"e.g.,":[81],"personalized":[82],"assistants.":[84],"However,":[85],"audio-visual":[87],"implicit.":[90],"Moreover,":[91],"existing":[93],"VCs":[94],"are":[95,111],"on":[97,125,159,183],"laboratory-collected":[98],"datasets":[99,105],"without":[100],"while":[103],"with":[106,119,150],"both":[107],"photos":[108],"in-the-wild":[112,127],"datasets.":[113],"Directly":[114],"replacing":[115],"target":[117,121,175],"audio":[118],"photo":[122],"training":[124,153],"dataset":[128,162],"leads":[129],"noisy":[131],"results.":[132],"To":[133],"address":[134],"these":[135],"issues,":[136],"propose":[138],"many-to-many":[141],"network,":[144],"namely":[145],"Face-based":[146],"Voice":[147],"Conversion":[148],"(FaceVC),":[149],"3-stage":[152],"strategy.":[154],"Quantitative":[155],"qualitative":[157],"experiments":[158],"LRS3-Ted":[161],"show":[163],"that":[164],"proposed":[166],"FaceVC":[167],"successfully":[168],"performs":[169],"according":[172],"photos.":[177],"Audio":[178],"samples":[179],"found":[182],"demo":[185],"website":[186],"at":[187],"https://facevc.github.io/.":[188]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
