{"id":"https://openalex.org/W4403791608","doi":"https://doi.org/10.1145/3664647.3681386","title":"SyncTalklip: Highly Synchronized Lip-Readable Speaker Generation with Multi-Task Learning","display_name":"SyncTalklip: Highly Synchronized Lip-Readable Speaker Generation with Multi-Task Learning","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791608","doi":"https://doi.org/10.1145/3664647.3681386"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681386","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681386","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681386?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681386?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040461965","display_name":"Xiaoda Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaoda Yang","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009897266","display_name":"Xize Cheng","orcid":"https://orcid.org/0000-0001-9708-3225"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xize Cheng","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101485171","display_name":"Dongjie Fu","orcid":"https://orcid.org/0009-0000-7682-7678"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongjie Fu","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113747251","display_name":"Minghui Fang","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minghui Fang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101307840","display_name":"Jialung Zuo","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jialung Zuo","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102904447","display_name":"Shengpeng Ji","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengpeng Ji","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079260216","display_name":"Zhou Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Zhao","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185","https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102011966","display_name":"Tao Jin","orcid":"https://orcid.org/0000-0003-3564-1628"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Tao","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5040461965"],"corresponding_institution_ids":["https://openalex.org/I4210123185","https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":1.4514,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82987157,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"8149","last_page":"8158"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8588805198669434},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7065888047218323},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5087183117866516},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3953922390937805},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.05421477556228638}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8588805198669434},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7065888047218323},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5087183117866516},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3953922390937805},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.05421477556228638},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681386","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681386","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681386?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3664647.3681386","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681386","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681386?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4403791608.pdf"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W1530404542","https://openalex.org/W2016589492","https://openalex.org/W2133665775","https://openalex.org/W2562637781","https://openalex.org/W2604379605","https://openalex.org/W2738406145","https://openalex.org/W2765811365","https://openalex.org/W2890952074","https://openalex.org/W2914217321","https://openalex.org/W2944294033","https://openalex.org/W2963290645","https://openalex.org/W2996833883","https://openalex.org/W3010434693","https://openalex.org/W3019952993","https://openalex.org/W3081492798","https://openalex.org/W3101631197","https://openalex.org/W3109114891","https://openalex.org/W3162293946","https://openalex.org/W3186090335","https://openalex.org/W3201519611","https://openalex.org/W3209059054","https://openalex.org/W4200630629","https://openalex.org/W4249075551","https://openalex.org/W4286482506","https://openalex.org/W4378513185","https://openalex.org/W4386065999","https://openalex.org/W4387968213","https://openalex.org/W4390873467","https://openalex.org/W4392567168","https://openalex.org/W4402670354","https://openalex.org/W4403791285","https://openalex.org/W4403791442","https://openalex.org/W4403792076"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Talking":[0],"Face":[1],"Generation":[2],"(TFG)":[3],"reconstructs":[4],"facial":[5],"motions":[6],"concerning":[7],"lips":[8],"given":[9],"speech":[10],"input,":[11],"which":[12],"aims":[13],"to":[14,84,113,134],"generate":[15],"highquality,":[16],"synchronized,":[17],"and":[18,29,31,58,103,119,131,148,162],"lip-readable":[19],"videos.":[20],"Previous":[21],"efforts":[22],"have":[23,153],"achieved":[24],"success":[25],"in":[26,51,62,68,100,145],"generating":[27],"quality":[28],"synchronization,":[30,57],"recently,":[32],"there":[33,47],"has":[34],"been":[35],"an":[36],"increasing":[37],"focus":[38],"on":[39,117],"the":[40,86,96],"importance":[41],"of":[42,70,74,88,98],"intelligibility.":[43,120],"Despite":[44],"these":[45],"efforts,":[46],"remains":[48],"a":[49,53,79,108,115,124],"challenge":[50],"achieving":[52],"balance":[54],"among":[55],"quality,":[56,146],"intelligibility,":[59,104,147],"often":[60],"resulting":[61],"trade-offs":[63],"that":[64,140],"compromise":[65],"one":[66],"aspect":[67],"favor":[69],"another.":[71],"In":[72],"light":[73],"this,":[75],"we":[76,105,122],"propose":[77,123],"SyncTalklip,":[78],"novel":[80,125],"dual-tower":[81],"framework":[82],"designed":[83],"overcome":[85],"challenges":[87],"synchronization":[89,102,118],"while":[90],"improving":[91],"lip-reading":[92],"performance.":[93],"To":[94],"enhance":[95,135],"performance":[97,144],"SyncTalklip":[99,141],"both":[101],"design":[106],"AV-SyncNet,":[107],"pre-trained":[109],"multi-task":[110],"model,":[111],"aiming":[112],"achieve":[114],"dual-focus":[116],"Moreover,":[121],"cross-modal":[126],"contrastive":[127],"learning":[128],"bringing":[129],"audio":[130],"video":[132],"closer":[133],"synchronization.":[136,149],"Experimental":[137],"results":[138],"demonstrate":[139],"achieves":[142],"state-of-the-art":[143],"Furthermore,":[150],"extensive":[151],"experiments":[152],"demonstrated":[154],"our":[155],"model's":[156],"generalizability":[157],"across":[158],"domains.":[159],"The":[160],"code":[161],"demo":[163],"is":[164],"available":[165],"at":[166],"https://sync-talklip.github.io.":[167]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
