{"id":"https://openalex.org/W4375868939","doi":"https://doi.org/10.1109/icassp49357.2023.10094807","title":"Wavsyncswap: End-To-End Portrait-Customized Audio-Driven Talking Face Generation","display_name":"Wavsyncswap: End-To-End Portrait-Customized Audio-Driven Talking Face Generation","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375868939","doi":"https://doi.org/10.1109/icassp49357.2023.10094807"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10094807","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094807","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036821828","display_name":"Weihong Bao","orcid":"https://orcid.org/0009-0008-4696-854X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weihong Bao","raw_affiliation_strings":["Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101849127","display_name":"Liyang Chen","orcid":"https://orcid.org/0000-0001-6985-8281"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liyang Chen","raw_affiliation_strings":["Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089469089","display_name":"Chaoyong Zhou","orcid":"https://orcid.org/0000-0001-8531-0945"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaoyong Zhou","raw_affiliation_strings":["Ping An Technology,Shenzhen,China","Ping An Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Ping An Technology,Shenzhen,China","institution_ids":["https://openalex.org/I4401726822"]},{"raw_affiliation_string":"Ping An Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112200211","display_name":"Sicheng Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sicheng Yang","raw_affiliation_strings":["Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102869280","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0001-8533-0524"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","The Chinese University of Hong Kong, Hong Kong SAR, China","Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5036821828"],"corresponding_institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.4913,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.6369637,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.783139169216156},{"id":"https://openalex.org/keywords/sync","display_name":"sync","score":0.6247168779373169},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.5095572471618652},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4758822023868561},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45869889855384827},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4392741918563843},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.42759209871292114},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.40727171301841736},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.22148025035858154}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.783139169216156},{"id":"https://openalex.org/C3913047","wikidata":"https://www.wikidata.org/wiki/Q1956265","display_name":"sync","level":3,"score":0.6247168779373169},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.5095572471618652},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4758822023868561},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45869889855384827},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4392741918563843},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.42759209871292114},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40727171301841736},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.22148025035858154},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10094807","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094807","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.6800000071525574,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2162220380","https://openalex.org/W2237250383","https://openalex.org/W2593414223","https://openalex.org/W2603777577","https://openalex.org/W2604379605","https://openalex.org/W2808631503","https://openalex.org/W2890952074","https://openalex.org/W2944294033","https://openalex.org/W2963073614","https://openalex.org/W2963800363","https://openalex.org/W2963839617","https://openalex.org/W2969985801","https://openalex.org/W3019952993","https://openalex.org/W3034301684","https://openalex.org/W3081492798","https://openalex.org/W3092709185","https://openalex.org/W3097792222","https://openalex.org/W3101631197","https://openalex.org/W3109114891","https://openalex.org/W3191243633","https://openalex.org/W3195529437","https://openalex.org/W3197199219","https://openalex.org/W3197199501","https://openalex.org/W4200174933","https://openalex.org/W4287026043","https://openalex.org/W4301206121","https://openalex.org/W4312911690","https://openalex.org/W6765779288","https://openalex.org/W6799942944","https://openalex.org/W6810221379"],"related_works":["https://openalex.org/W4392306433","https://openalex.org/W2063547430","https://openalex.org/W2524993630","https://openalex.org/W2607244263","https://openalex.org/W2234430299","https://openalex.org/W1831690014","https://openalex.org/W1561022130","https://openalex.org/W1878822803","https://openalex.org/W1995263003","https://openalex.org/W2222580448"],"abstract_inverted_index":{"Audio-driven":[0],"talking":[1,29],"face":[2,30,32],"with":[3,91,119],"portrait":[4],"customization":[5],"enhances":[6],"the":[7,25,45,81,92,107,112,145,153],"flexibility":[8],"of":[9,83,94,115,147],"avatar":[10],"applications":[11],"for":[12,53],"different":[13],"scenarios,":[14],"such":[15],"as":[16,37],"on-line":[17],"meetings,":[18],"mixed":[19],"reality,":[20],"and":[21,31,51,66,106,143],"data":[22],"generation.":[23],"Among":[24],"existing":[26],"methods,":[27,142],"audio-driven":[28],"swapping":[33],"are":[34,41],"typically":[35],"viewed":[36],"separate":[38],"tasks":[39],"that":[40,79,125],"cascaded":[42],"to":[43,132],"achieve":[44],"objective.":[46],"Using":[47],"state-of-the-art":[48],"methods":[49],"Wav2Lip":[50],"SimSwap":[52],"this":[54],"purpose,":[55],"we":[56,74],"meet":[57],"some":[58],"issues:":[59],"affected":[60],"mouth":[61,90],"synchronization,":[62],"lost":[63],"texture":[64,138],"information,":[65],"slow":[67],"inference":[68,154],"speed.":[69,155],"To":[70],"resolve":[71],"these":[72],"issues,":[73],"propose":[75],"an":[76],"end-to-end":[77],"model":[78,113],"combines":[80],"advantages":[82],"both":[84],"approaches.":[85],"Our":[86],"approach":[87,151],"generates":[88],"highly-synchronized":[89],"aid":[93],"a":[95],"pre-trained":[96],"lip-sync":[97,129],"discriminator.":[98],"And":[99],"identity":[100],"information":[101],"is":[102],"provided":[103],"by":[104],"ArcFace":[105],"ID":[108],"injection":[109],"module":[110],"in":[111],"because":[114],"its":[116],"strong":[117],"correlation":[118],"facial":[120],"texture.":[121],"Experimental":[122],"results":[123],"demonstrate":[124],"our":[126,150],"method":[127],"achieves":[128],"accuracy":[130],"comparable":[131],"real":[133],"synced":[134],"videos,":[135],"preserves":[136],"more":[137],"details":[139],"than":[140],"cascade":[141],"alleviates":[144],"blurring":[146],"Wav2Lip.":[148],"Also,":[149],"improves":[152],"<sup":[156],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[157],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[158]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
