{"id":"https://openalex.org/W2964073455","doi":"https://doi.org/10.1109/icassp.2019.8683872","title":"Audiovisual Speaker Conversion: Jointly and Simultaneously Transforming Facial Expression and Acoustic Characteristics","display_name":"Audiovisual Speaker Conversion: Jointly and Simultaneously Transforming Facial Expression and Acoustic Characteristics","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2964073455","doi":"https://doi.org/10.1109/icassp.2019.8683872","mag":"2964073455"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8683872","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8683872","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/2e0d2942-b778-4733-814a-09e5ea9f21c3","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012466506","display_name":"Fuming Fang","orcid":"https://orcid.org/0000-0002-9332-3735"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Fuming Fang","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327839","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8246-0606"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044556342","display_name":"Isao Echizen","orcid":"https://orcid.org/0000-0003-4908-1860"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Isao Echizen","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5012466506"],"corresponding_institution_ids":["https://openalex.org/I184597095"],"apc_list":null,"apc_paid":null,"fwci":0.9951,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.76095495,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"6795","last_page":"6799"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7615102529525757},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6888395547866821},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.557191014289856},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.5521772503852844},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.5481330752372742},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5143888592720032},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4774651825428009},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4231756329536438},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.06176096200942993}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7615102529525757},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6888395547866821},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.557191014289856},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.5521772503852844},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.5481330752372742},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5143888592720032},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4774651825428009},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4231756329536438},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.06176096200942993},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icassp.2019.8683872","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8683872","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/2e0d2942-b778-4733-814a-09e5ea9f21c3","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/2e0d2942-b778-4733-814a-09e5ea9f21c3","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Fang, F, Wang, X, Yamagishi, J & Echizen, I 2019, Audiovisual Speaker Conversion: Jointly and Simultaneously Transforming Facial Expression and Acoustic Characteristics. in ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Institute of Electrical and Electronics Engineers, Brighton, United Kingdom, pp. 6795-6799, 44th International Conference on Acoustics, Speech, and Signal Processing, Brighton , United Kingdom, 12/05/19. https://doi.org/10.1109/ICASSP.2019.8683872","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/2e0d2942-b778-4733-814a-09e5ea9f21c3","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/portal/en/publications/audiovisual-speaker-conversion-jointly-and-simultaneously-transforming-facial-expression-and-acoustic-characteristics(2e0d2942-b778-4733-814a-09e5ea9f21c3).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Fang, F, Wang, X, Yamagishi, J & Echizen, I 2019, Audiovisual Speaker Conversion: Jointly and Simultaneously Transforming Facial Expression and Acoustic Characteristics. in ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Institute of Electrical and Electronics Engineers, Brighton, United Kingdom, pp. 6795-6799, 44th International Conference on Acoustics, Speech, and Signal Processing, Brighton , United Kingdom, 12/05/19. https://doi.org/10.1109/ICASSP.2019.8683872","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/2e0d2942-b778-4733-814a-09e5ea9f21c3","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/2e0d2942-b778-4733-814a-09e5ea9f21c3","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Fang, F, Wang, X, Yamagishi, J & Echizen, I 2019, Audiovisual Speaker Conversion: Jointly and Simultaneously Transforming Facial Expression and Acoustic Characteristics. in ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Institute of Electrical and Electronics Engineers, Brighton, United Kingdom, pp. 6795-6799, 44th International Conference on Acoustics, Speech, and Signal Processing, Brighton , United Kingdom, 12/05/19. https://doi.org/10.1109/ICASSP.2019.8683872","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320338075","display_name":"Core Research for Evolutional Science and Technology","ror":"https://ror.org/00097mb19"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1485981043","https://openalex.org/W1523385540","https://openalex.org/W1546593297","https://openalex.org/W1686810756","https://openalex.org/W1836465849","https://openalex.org/W1921523184","https://openalex.org/W2002774878","https://openalex.org/W2013996527","https://openalex.org/W2112796928","https://openalex.org/W2194775991","https://openalex.org/W2293078015","https://openalex.org/W2301937176","https://openalex.org/W2486034530","https://openalex.org/W2519091744","https://openalex.org/W2559085405","https://openalex.org/W2593414223","https://openalex.org/W2737658251","https://openalex.org/W2738406145","https://openalex.org/W2771039075","https://openalex.org/W2782422271","https://openalex.org/W2806315375","https://openalex.org/W2886022419","https://openalex.org/W2886232760","https://openalex.org/W2887437849","https://openalex.org/W2889455726","https://openalex.org/W2949382160","https://openalex.org/W2962835968","https://openalex.org/W2963840672","https://openalex.org/W2964069186","https://openalex.org/W2964171275","https://openalex.org/W3101481642","https://openalex.org/W6631216910","https://openalex.org/W6637373629","https://openalex.org/W6638667902","https://openalex.org/W6640185926","https://openalex.org/W6696085341","https://openalex.org/W6746717610","https://openalex.org/W6748181857"],"related_works":["https://openalex.org/W4297807400","https://openalex.org/W1491159402","https://openalex.org/W4313854686","https://openalex.org/W2249138175","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2029134149","https://openalex.org/W2368768466","https://openalex.org/W2757081366"],"abstract_inverted_index":{"An":[0],"audiovisual":[1,115],"speaker":[2,18,51],"conversion":[3,63],"method":[4,121],"is":[5],"presented":[6],"for":[7,35,47],"simultaneously":[8],"transforming":[9],"the":[10,26,36,48,69,80,84,105,119],"facial":[11,27,40,70,86,99,134],"expressions":[12,41],"and":[13,28,39,46,54,67,71,87,90,133],"voice":[14,38],"of":[15,21,110],"a":[16,22,62,74],"source":[17],"into":[19],"those":[20],"target":[23,50],"speaker.":[24],"Transforming":[25],"acoustic":[29,72,88,132],"features":[30],"together":[31],"makes":[32],"it":[33],"possible":[34],"converted":[37,85,106],"to":[42,52],"be":[43],"highly":[44],"correlated":[45],"generated":[49],"appear":[53],"sound":[55],"natural.":[56],"It":[57],"uses":[58],"three":[59],"neural":[60],"networks:":[61],"network":[64,77,94],"that":[65,78,95,118,129],"fuses":[66],"transforms":[68],"features,":[73,89],"waveform":[75,81],"generation":[76],"produces":[79],"from":[82],"both":[83,104],"an":[91,97,113],"image":[92,100],"reconstruction":[93],"outputs":[96],"RGB":[98],"also":[101],"based":[102],"on":[103],"features.":[107,135],"The":[108],"results":[109],"experiments":[111],"using":[112],"emotional":[114],"database":[116],"showed":[117],"proposed":[120],"achieved":[122],"significantly":[123],"higher":[124],"naturalness":[125],"compared":[126],"with":[127],"one":[128],"separately":[130],"transformed":[131]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
