{"id":"https://openalex.org/W3205904637","doi":"https://doi.org/10.1145/3474085.3475318","title":"Towards Realistic Visual Dubbing with Heterogeneous Sources","display_name":"Towards Realistic Visual Dubbing with Heterogeneous Sources","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3205904637","doi":"https://doi.org/10.1145/3474085.3475318","mag":"3205904637"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475318","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475318","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2201.06260","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066989731","display_name":"Tianyi Xie","orcid":"https://orcid.org/0009-0000-5732-7379"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianyi Xie","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006657965","display_name":"Liucheng Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liucheng Liao","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085657064","display_name":"Cheng Bi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng Bi","raw_affiliation_strings":["ByteDance AI Lab, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"ByteDance AI Lab, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031898944","display_name":"Benlai Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benlai Tang","raw_affiliation_strings":["ByteDance AI Lab, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"ByteDance AI Lab, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069496916","display_name":"Xiang Yin","orcid":"https://orcid.org/0000-0002-6554-1516"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiang Yin","raw_affiliation_strings":["ByteDance AI Lab, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"ByteDance AI Lab, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005666034","display_name":"Jianfei Yang","orcid":"https://orcid.org/0000-0002-8075-0439"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Jianfei Yang","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100634350","display_name":"Mingjie Wang","orcid":"https://orcid.org/0000-0002-7346-1110"},"institutions":[{"id":"https://openalex.org/I130438778","display_name":"Memorial University of Newfoundland","ror":"https://ror.org/04haebc03","country_code":"CA","type":"education","lineage":["https://openalex.org/I130438778"]},{"id":"https://openalex.org/I79817857","display_name":"University of Guelph","ror":"https://ror.org/01r7awg59","country_code":"CA","type":"education","lineage":["https://openalex.org/I79817857"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mingjie Wang","raw_affiliation_strings":["University of Guelph &amp; Memorial University of Newfoundland, Guelph, ON, Canada"],"affiliations":[{"raw_affiliation_string":"University of Guelph &amp; Memorial University of Newfoundland, Guelph, ON, Canada","institution_ids":["https://openalex.org/I79817857","https://openalex.org/I130438778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102857591","display_name":"Jiali Yao","orcid":"https://orcid.org/0009-0009-9673-3658"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiali Yao","raw_affiliation_strings":["ByteDance AI Lab, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"ByteDance AI Lab, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354733","display_name":"Yang Zhang","orcid":"https://orcid.org/0000-0002-8540-1254"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang Zhang","raw_affiliation_strings":["ByteDance AI Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"ByteDance AI Lab, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110851569","display_name":"Zejun Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zejun Ma","raw_affiliation_strings":["ByteDance AI Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"ByteDance AI Lab, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5066989731"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":3.2318,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.92966915,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1739","last_page":"1747"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.849226713180542},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6046289801597595},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5002624988555908},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4924872815608978},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.45673441886901855},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45414280891418457},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.4481050372123718},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3317405879497528}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.849226713180542},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6046289801597595},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5002624988555908},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4924872815608978},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.45673441886901855},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45414280891418457},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.4481050372123718},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3317405879497528},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3474085.3475318","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475318","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2201.06260","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2201.06260","pdf_url":"https://arxiv.org/pdf/2201.06260","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2201.06260","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2201.06260","pdf_url":"https://arxiv.org/pdf/2201.06260","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6899999976158142,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W1901129140","https://openalex.org/W2133665775","https://openalex.org/W2325939864","https://openalex.org/W2331128040","https://openalex.org/W2603777577","https://openalex.org/W2738406145","https://openalex.org/W2806833697","https://openalex.org/W2890952074","https://openalex.org/W2914217321","https://openalex.org/W2944294033","https://openalex.org/W2960274051","https://openalex.org/W2962770929","https://openalex.org/W2962785568","https://openalex.org/W2962793481","https://openalex.org/W2963073614","https://openalex.org/W2963789946","https://openalex.org/W2963800363","https://openalex.org/W2963981733","https://openalex.org/W2971634123","https://openalex.org/W2981767644","https://openalex.org/W2982763192","https://openalex.org/W2990452356","https://openalex.org/W2998605827","https://openalex.org/W3004048296","https://openalex.org/W3019952993","https://openalex.org/W3023706973","https://openalex.org/W3042500441","https://openalex.org/W3081492798","https://openalex.org/W3095664976","https://openalex.org/W3097792222","https://openalex.org/W3101631197","https://openalex.org/W3105763085","https://openalex.org/W3106532934","https://openalex.org/W3107666850","https://openalex.org/W3109114891","https://openalex.org/W3112809496","https://openalex.org/W3145297507","https://openalex.org/W3150845012","https://openalex.org/W4226348722","https://openalex.org/W4301206121"],"related_works":["https://openalex.org/W2381850946","https://openalex.org/W4380449851","https://openalex.org/W3125091513","https://openalex.org/W4318832338","https://openalex.org/W4248383205","https://openalex.org/W4234745530","https://openalex.org/W2146383839","https://openalex.org/W2231829109","https://openalex.org/W2916591301","https://openalex.org/W2789577489"],"abstract_inverted_index":{"The":[0],"task":[1,122],"of":[2,34,72,96,110,123,200],"few-shot":[3,77],"visual":[4,78],"dubbing":[5],"focuses":[6],"on":[7],"synchronizing":[8],"the":[9,40,55,115,120,139,155,175,180,191,198,211,214],"lip":[10,116],"movements":[11,117],"with":[12,92,210],"arbitrary":[13],"speech":[14,212],"input":[15],"for":[16,62,142,163],"any":[17],"talking":[18,125,166],"head":[19,126],"video.":[20],"Albeit":[21],"moderate":[22],"improvements":[23],"in":[24,59,80,174,204],"current":[25],"approaches,":[26],"they":[27],"commonly":[28],"require":[29],"high-quality":[30],"homologous":[31,57],"data":[32,45,58,73,149],"sources":[33],"videos":[35,208],"and":[36,74,113],"audios,":[37],"thus":[38],"causing":[39],"failure":[41],"to":[42,53,136,154,170,190],"leverage":[43],"heterogeneous":[44,98,148],"sufficiently.":[46],"In":[47],"practice,":[48],"it":[49,134],"may":[50],"be":[51],"intractable":[52],"collect":[54],"perfect":[56],"some":[60],"cases,":[61],"example,":[63],"audio-corrupted":[64],"or":[65],"picture-blurry":[66],"videos.":[67],"To":[68],"explore":[69],"this":[70,81,129],"kind":[71],"support":[75],"high-fidelity":[76],"dubbing,":[79],"paper,":[82],"we":[83],"novelly":[84],"propose":[85],"a":[86,93,160,164],"simple":[87],"yet":[88],"efficient":[89],"two-stage":[90,102,143],"framework":[91,158],"higher":[94],"flexibility":[95],"mining":[97],"data.":[99],"Specifically,":[100],"our":[101,131,157,201],"paradigm":[103],"employs":[104],"facial":[105],"landmarks":[106],"as":[107],"intermediate":[108],"prior":[109],"latent":[111],"representations":[112],"disentangles":[114],"prediction":[118],"from":[119,188],"core":[121],"realistic":[124,207],"generation.":[127],"By":[128],"means,":[130],"method":[132,182,203],"makes":[133],"possible":[135],"independently":[137],"utilize":[138],"training":[140],"corpus":[141],"sub-networks":[144],"using":[145],"more":[146],"available":[147],"easily":[150],"acquired.":[151],"Besides,":[152],"thanks":[153],"disentanglement,":[156],"allows":[159],"further":[161],"fine-tuning":[162],"given":[165],"head,":[167],"thereby":[168],"leading":[169],"better":[171],"speaker-identity":[172],"preserving":[173],"final":[176],"synthesized":[177],"results.":[178],"Moreover,":[179],"proposed":[181,202],"can":[183],"also":[184],"transfer":[185],"appearance":[186],"features":[187],"others":[189],"target":[192],"speaker.":[193],"Extensive":[194],"experimental":[195],"results":[196],"demonstrate":[197],"superiority":[199],"generating":[205],"highly":[206],"synchronized":[209],"over":[213],"state-of-the-art.":[215]},"counts_by_year":[{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":5}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
