{"id":"https://openalex.org/W4311137635","doi":"https://doi.org/10.1145/3550469.3555393","title":"Masked Lip-Sync Prediction by Audio-Visual Contextual Exploitation in Transformers","display_name":"Masked Lip-Sync Prediction by Audio-Visual Contextual Exploitation in Transformers","publication_year":2022,"publication_date":"2022-11-29","ids":{"openalex":"https://openalex.org/W4311137635","doi":"https://doi.org/10.1145/3550469.3555393"},"language":"en","primary_location":{"id":"doi:10.1145/3550469.3555393","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3550469.3555393","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIGGRAPH Asia 2022 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082635465","display_name":"Yasheng Sun","orcid":"https://orcid.org/0000-0002-0589-4424"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yasheng Sun","raw_affiliation_strings":["Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100707855","display_name":"Hang Zhou","orcid":"https://orcid.org/0000-0002-2616-923X"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Hang Zhou","raw_affiliation_strings":["Baidu Inc., China and The Chinese University of Hong Kong (CUHK), China"],"affiliations":[{"raw_affiliation_string":"Baidu Inc., China and The Chinese University of Hong Kong (CUHK), China","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002386773","display_name":"Kaisiyuan Wang","orcid":"https://orcid.org/0000-0002-2120-8383"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Kaisiyuan Wang","raw_affiliation_strings":["University of Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of Sydney, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078823053","display_name":"Qianyi Wu","orcid":"https://orcid.org/0000-0001-8764-6178"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Qianyi Wu","raw_affiliation_strings":["Monash University, Australia"],"affiliations":[{"raw_affiliation_string":"Monash University, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102727690","display_name":"Zhibin Hong","orcid":"https://orcid.org/0000-0003-0207-4751"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhibin Hong","raw_affiliation_strings":["Baidu, China"],"affiliations":[{"raw_affiliation_string":"Baidu, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076936566","display_name":"Jingtuo Liu","orcid":"https://orcid.org/0000-0003-0566-0780"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingtuo Liu","raw_affiliation_strings":["Baidu Inc., China"],"affiliations":[{"raw_affiliation_string":"Baidu Inc., China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050031109","display_name":"Errui Ding","orcid":"https://orcid.org/0000-0002-1867-5378"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Errui Ding","raw_affiliation_strings":["Baidu Inc., China"],"affiliations":[{"raw_affiliation_string":"Baidu Inc., China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075880303","display_name":"Jingdong Wang","orcid":"https://orcid.org/0000-0002-4888-4445"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingdong Wang","raw_affiliation_strings":["Baidu Inc., China"],"affiliations":[{"raw_affiliation_string":"Baidu Inc., China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100406050","display_name":"Ziwei Liu","orcid":"https://orcid.org/0000-0002-4220-5958"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ziwei Liu","raw_affiliation_strings":["Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056982483","display_name":"Hideki Koike","orcid":"https://orcid.org/0000-0002-8989-6434"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Koike Hideki","raw_affiliation_strings":["Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":5,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5082635465"],"corresponding_institution_ids":["https://openalex.org/I114531698"],"apc_list":null,"apc_paid":null,"fwci":3.4261,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.93312208,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8099313378334045},{"id":"https://openalex.org/keywords/sync","display_name":"sync","score":0.7115370035171509},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6076720356941223},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.565650224685669},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5026092529296875},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49823594093322754},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4661373496055603},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.42372581362724304},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.15849089622497559},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.09913179278373718}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8099313378334045},{"id":"https://openalex.org/C3913047","wikidata":"https://www.wikidata.org/wiki/Q1956265","display_name":"sync","level":3,"score":0.7115370035171509},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6076720356941223},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.565650224685669},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5026092529296875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49823594093322754},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4661373496055603},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.42372581362724304},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.15849089622497559},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.09913179278373718},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3550469.3555393","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3550469.3555393","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIGGRAPH Asia 2022 Conference Papers","raw_type":"proceedings-article"},{"id":"pmh:oai:irdb.nii.ac.jp:00897:0005861093","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100893680","pdf_url":null,"source":{"id":"https://openalex.org/S7407056385","display_name":"Institutional Repositories DataBase (IRDB)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184597095","host_organization_name":"National Institute of Informatics","host_organization_lineage":["https://openalex.org/I184597095"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proc. on SIGGRAPH Asia 2022","raw_type":"conference paper"},{"id":"pmh:oai:t2r2.star.titech.ac.jp:50701051","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100917730","pdf_url":null,"source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2133665775","https://openalex.org/W2237250383","https://openalex.org/W2604379605","https://openalex.org/W2726515241","https://openalex.org/W2738406145","https://openalex.org/W2752796333","https://openalex.org/W2808631503","https://openalex.org/W2914217321","https://openalex.org/W2944294033","https://openalex.org/W2963081548","https://openalex.org/W2963091558","https://openalex.org/W2963290645","https://openalex.org/W2963800363","https://openalex.org/W2964559396","https://openalex.org/W2979894294","https://openalex.org/W3019952993","https://openalex.org/W3035022492","https://openalex.org/W3081492798","https://openalex.org/W3094502228","https://openalex.org/W3097792222","https://openalex.org/W3101631197","https://openalex.org/W3109114891","https://openalex.org/W3138516171","https://openalex.org/W3174763799","https://openalex.org/W3175544090","https://openalex.org/W3175779516","https://openalex.org/W3176721746","https://openalex.org/W3180355996","https://openalex.org/W3186090335","https://openalex.org/W3187364420","https://openalex.org/W3206082266","https://openalex.org/W3207849023","https://openalex.org/W3208601549","https://openalex.org/W3209984917","https://openalex.org/W4200631136","https://openalex.org/W4283818626","https://openalex.org/W4312358791","https://openalex.org/W4313021454","https://openalex.org/W6797790494"],"related_works":["https://openalex.org/W2063547430","https://openalex.org/W2524993630","https://openalex.org/W2292945204","https://openalex.org/W887159372","https://openalex.org/W2349867592","https://openalex.org/W2075546475","https://openalex.org/W4390137271","https://openalex.org/W1524304656","https://openalex.org/W2060119993","https://openalex.org/W2591588700"],"abstract_inverted_index":{"Previous":[0],"studies":[1],"have":[2],"explored":[3],"generating":[4],"accurately":[5],"lip-synced":[6,181],"talking":[7],"faces":[8],"for":[9,127,183],"arbitrary":[10,184],"targets":[11],"given":[12],"audio":[13,66,105,150,164],"conditions.":[14],"However,":[15],"most":[16],"of":[17,38,44,54,65],"them":[18],"deform":[19],"or":[20],"generate":[21,179],"the":[22,36,41,45,55,63,75,90,129,136,140,144,148,156],"whole":[23],"facial":[24],"area,":[25],"leading":[26],"to":[27,98,135],"non-realistic":[28],"results.":[29],"In":[30],"this":[31,71],"work,":[32],"we":[33,73,115],"delve":[34],"into":[35],"formulation":[37],"altering":[39],"only":[40],"mouth":[42,92],"shapes":[43],"target":[46],"person.":[47],"This":[48],"requires":[49],"masking":[50],"a":[51,117,160],"large":[52],"percentage":[53],"original":[56],"image":[57,168],"and":[58,67,106,121,143,169],"seamlessly":[59],"inpainting":[60],"it":[61],"with":[62,85,110,163],"aid":[64],"reference":[68,145],"frames.":[69],"To":[70],"end,":[72],"propose":[74,116],"Audio-Visual":[76],"Context-Aware":[77],"Transformer":[78],"(AV-CAT)":[79],"framework,":[80],"which":[81],"produces":[82],"accurate":[83],"lip-sync":[84,170],"photo-realistic":[86],"quality":[87],"by":[88],"predicting":[89],"masked":[91,130],"shapes.":[93],"Our":[94],"key":[95],"insight":[96],"is":[97,152],"exploit":[99],"desired":[100],"contextual":[101],"information":[102,138,151],"provided":[103],"in":[104,154],"visual":[107],"modalities":[108],"thoroughly":[109],"delicately":[111],"designed":[112],"Transformers.":[113],"Specifically,":[114],"convolution-Transformer":[118],"hybrid":[119],"backbone":[120],"design":[122],"an":[123],"attention-based":[124],"fusion":[125],"strategy":[126],"filling":[128],"parts.":[131],"It":[132],"uniformly":[133],"attends":[134],"textural":[137],"on":[139],"unmasked":[141],"regions":[142],"frame.":[146],"Then":[147],"semantic":[149],"involved":[153],"enhancing":[155],"self-attention":[157],"computation.":[158],"Additionally,":[159],"refinement":[161],"network":[162],"injection":[165],"improves":[166],"both":[167],"quality.":[171],"Extensive":[172],"experiments":[173],"validate":[174],"that":[175],"our":[176],"model":[177],"can":[178],"high-fidelity":[180],"results":[182],"subjects.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":4}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
