{"id":"https://openalex.org/W4415707850","doi":"https://doi.org/10.1109/icme59968.2025.11209973","title":"ET-Talk: Effective Training Strategy to Enhance Synchrony and Fidelity for Talking Face Generation","display_name":"ET-Talk: Effective Training Strategy to Enhance Synchrony and Fidelity for Talking Face Generation","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415707850","doi":"https://doi.org/10.1109/icme59968.2025.11209973"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209973","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109787503","display_name":"Baiqin Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Baiqin Wang","raw_affiliation_strings":["Chinese Academy of Sciences,MAIS, Institute of Automation"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,MAIS, Institute of Automation","institution_ids":["https://openalex.org/I4210112150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100632827","display_name":"Xiangyu Zhu","orcid":"https://orcid.org/0000-0003-2756-401X"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Zhu","raw_affiliation_strings":["Chinese Academy of Sciences,MAIS, Institute of Automation"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,MAIS, Institute of Automation","institution_ids":["https://openalex.org/I4210112150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101372193","display_name":"Fan Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210108815","display_name":"Psychogenics (United States)","ror":"https://ror.org/021k6d838","country_code":"US","type":"company","lineage":["https://openalex.org/I4210108815"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fan Shen","raw_affiliation_strings":["Psyche AI.INC"],"affiliations":[{"raw_affiliation_string":"Psyche AI.INC","institution_ids":["https://openalex.org/I4210108815"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108822331","display_name":"Hao Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210108815","display_name":"Psychogenics (United States)","ror":"https://ror.org/021k6d838","country_code":"US","type":"company","lineage":["https://openalex.org/I4210108815"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Xu","raw_affiliation_strings":["Psyche AI.INC"],"affiliations":[{"raw_affiliation_string":"Psyche AI.INC","institution_ids":["https://openalex.org/I4210108815"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041686328","display_name":"Shukai Chen","orcid":"https://orcid.org/0000-0001-5089-9657"},"institutions":[{"id":"https://openalex.org/I1301182553","display_name":"CNH Industrial (Czechia)","ror":"https://ror.org/01ry8n211","country_code":"CZ","type":"company","lineage":["https://openalex.org/I1301182553","https://openalex.org/I4210102001"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Shukai Chen","raw_affiliation_strings":["ZKTeco"],"affiliations":[{"raw_affiliation_string":"ZKTeco","institution_ids":["https://openalex.org/I1301182553"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101629247","display_name":"Zhen Lei","orcid":"https://orcid.org/0000-0002-0450-0913"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen Lei","raw_affiliation_strings":["Chinese Academy of Sciences,MAIS, Institute of Automation"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,MAIS, Institute of Automation","institution_ids":["https://openalex.org/I4210112150"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5109787503"],"corresponding_institution_ids":["https://openalex.org/I4210112150"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33646949,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.41339999437332153,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.41339999437332153,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.38760000467300415,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.09759999811649323,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.6090999841690063},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.5396999716758728},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.444599986076355},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.4401000142097473},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.42980000376701355},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4180999994277954},{"id":"https://openalex.org/keywords/copying","display_name":"Copying","score":0.4108999967575073}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6717000007629395},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.656000018119812},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.6090999841690063},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6003999710083008},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.5396999716758728},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.444599986076355},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.4401000142097473},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.42980000376701355},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4180999994277954},{"id":"https://openalex.org/C2779151265","wikidata":"https://www.wikidata.org/wiki/Q1156791","display_name":"Copying","level":2,"score":0.4108999967575073},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3882000148296356},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.3695000112056732},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3596999943256378},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.3319000005722046},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.3310999870300293},{"id":"https://openalex.org/C106430172","wikidata":"https://www.wikidata.org/wiki/Q6002272","display_name":"Image restoration","level":4,"score":0.2922999858856201},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2849999964237213}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209973","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1834627138","https://openalex.org/W2604379605","https://openalex.org/W2962770929","https://openalex.org/W2962785568","https://openalex.org/W2963307811","https://openalex.org/W3035574324","https://openalex.org/W3081492798","https://openalex.org/W3109585842","https://openalex.org/W3167297682","https://openalex.org/W3186090335","https://openalex.org/W3197199219","https://openalex.org/W3211147706","https://openalex.org/W4310379947","https://openalex.org/W4385318467","https://openalex.org/W4386071653","https://openalex.org/W4386072021","https://openalex.org/W4386075487","https://openalex.org/W4390872116","https://openalex.org/W4402726971","https://openalex.org/W4404199654"],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"significant":[1],"advancements":[2],"have":[3],"been":[4],"made":[5],"in":[6,17,124,171],"audio-driven":[7],"talking":[8],"face":[9,56],"generation.":[10],"While":[11],"GAN-based":[12],"methods":[13],"are":[14],"widely":[15],"used":[16],"this":[18],"task,":[19],"they":[20],"struggle":[21],"to":[22,33,48,100,153],"achieve":[23],"simultaneous":[24],"lip":[25,30,39,83,116,173],"accuracy":[26,174],"and":[27,50,64,76,85,113,149,156,175],"high-fidelity.":[28],"Generated":[29],"shapes":[31],"tend":[32],"be":[34],"overly":[35],"influenced":[36],"by":[37],"the":[38,54,92,102,107,111,130,134,161],"of":[40,115],"reference":[41,93,108],"images":[42,123],"that":[43,80,165],"provide":[44],"identity":[45,155],"information,":[46],"leading":[47],"unstable":[49],"unsynchronized":[51],"results.":[52],"Moreover,":[53],"synthesized":[55],"frequently":[57],"suffers":[58],"from":[59,104],"blurred":[60],"teeth,":[61],"skin":[62],"textures,":[63],"compromised":[65],"facial":[66,86,122],"identity.":[67],"To":[68],"address":[69],"these":[70],"challenges,":[71],"we":[72,89,119],"propose":[73],"an":[74],"effective":[75],"innovative":[77],"training":[78,125,141],"strategy":[79,99,142],"simultaneously":[81],"ensures":[82],"synchrony":[84,148],"fidelity.":[87,137],"First,":[88],"adaptively":[90],"select":[91],"image":[94,151,176],"using":[95],"a":[96,126,139],"hard-mining":[97],"based":[98],"prevent":[101],"network":[103],"simply":[105],"copying":[106],"lip,":[109],"enhancing":[110],"stability":[112],"synchronicity":[114],"movements.":[117],"Second,":[118],"incorporate":[120],"high-resolution":[121],"quality":[127,152],"discriminator":[128],"within":[129],"GAN":[131],"loss,":[132],"improving":[133],"generated":[135],"faces\u2019":[136],"Third,":[138],"global-to-detail":[140],"is":[143],"employed,":[144],"starting":[145],"with":[146],"strengthening":[147],"then":[150],"preserve":[154],"visual":[157],"details.":[158],"Experiments":[159],"on":[160],"HDTF":[162],"dataset":[163],"demonstrate":[164],"our":[166],"method":[167],"achieves":[168],"state-of-the-art":[169],"performance":[170],"both":[172],"quality.":[177]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-30T00:00:00"}
