{"id":"https://openalex.org/W4415444055","doi":"https://doi.org/10.1109/thms.2025.3618409","title":"AV-Lip-Sync+: Leveraging AV-HuBERT to Exploit Multimodal Inconsistency for Deepfake Detection of Frontal Face Videos","display_name":"AV-Lip-Sync+: Leveraging AV-HuBERT to Exploit Multimodal Inconsistency for Deepfake Detection of Frontal Face Videos","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W4415444055","doi":"https://doi.org/10.1109/thms.2025.3618409"},"language":null,"primary_location":{"id":"doi:10.1109/thms.2025.3618409","is_oa":false,"landing_page_url":"https://doi.org/10.1109/thms.2025.3618409","pdf_url":null,"source":{"id":"https://openalex.org/S2476799526","display_name":"IEEE Transactions on Human-Machine Systems","issn_l":"2168-2291","issn":["2168-2291","2168-2305"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Human-Machine Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018523027","display_name":"Sahibzada Adil Shahzad","orcid":"https://orcid.org/0009-0000-5591-8423"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Sahibzada Adil Shahzad","raw_affiliation_strings":["Social Networks and Human-Centered Computing Program, Taiwan International Graduate Program, Academia Sinica, Taipei, Taiwan"],"raw_orcid":"https://orcid.org/0009-0000-5591-8423","affiliations":[{"raw_affiliation_string":"Social Networks and Human-Centered Computing Program, Taiwan International Graduate Program, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016272627","display_name":"Ammarah Hashmi","orcid":"https://orcid.org/0000-0002-1973-6902"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ammarah Hashmi","raw_affiliation_strings":["Social Networks and Human-Centered Computing Program, Taiwan International Graduate Program, Academia Sinica, Taipei, Taiwan"],"raw_orcid":"https://orcid.org/0000-0002-1973-6902","affiliations":[{"raw_affiliation_string":"Social Networks and Human-Centered Computing Program, Taiwan International Graduate Program, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016386447","display_name":"Yan\u2010Tsung Peng","orcid":"https://orcid.org/0000-0002-3802-1670"},"institutions":[{"id":"https://openalex.org/I87354575","display_name":"National Chengchi University","ror":"https://ror.org/03rqk8h36","country_code":"TW","type":"education","lineage":["https://openalex.org/I87354575"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yan-Tsung Peng","raw_affiliation_strings":["Department of Computer Science, National Chengchi University, Taipei, Taiwan"],"raw_orcid":"https://orcid.org/0000-0002-3802-1670","affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Chengchi University, Taipei, Taiwan","institution_ids":["https://openalex.org/I87354575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044008055","display_name":"Yu Tsao","orcid":"https://orcid.org/0000-0001-6956-0418"},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu Tsao","raw_affiliation_strings":["Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan"],"raw_orcid":"https://orcid.org/0000-0001-6956-0418","affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071214181","display_name":"Hsin\u2010Min Wang","orcid":"https://orcid.org/0000-0003-3599-5071"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsin-Min Wang","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-3599-5071","affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5018523027"],"corresponding_institution_ids":["https://openalex.org/I4210098366"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83406321,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"55","issue":"6","first_page":"973","last_page":"982"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9517999887466431,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9517999887466431,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.9290000200271606},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6568999886512756},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6534000039100647},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.5436000227928162},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5083000063896179},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.49639999866485596},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.42179998755455017},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4165000021457672}],"concepts":[{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.9290000200271606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8690000176429749},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.705299973487854},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6568999886512756},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6534000039100647},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.5436000227928162},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5083000063896179},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.49639999866485596},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.42179998755455017},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4165000021457672},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4036000072956085},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3995000123977661},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3961000144481659},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3878999948501587},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.38510000705718994},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.3790000081062317},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3474999964237213},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.3271999955177307},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.27709999680519104},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2565000057220459}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/thms.2025.3618409","is_oa":false,"landing_page_url":"https://doi.org/10.1109/thms.2025.3618409","pdf_url":null,"source":{"id":"https://openalex.org/S2476799526","display_name":"IEEE Transactions on Human-Machine Systems","issn_l":"2168-2291","issn":["2168-2291","2168-2305"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Human-Machine Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W1482998036","https://openalex.org/W2301937176","https://openalex.org/W2303197844","https://openalex.org/W2406845569","https://openalex.org/W2407170210","https://openalex.org/W2531409750","https://openalex.org/W2558151185","https://openalex.org/W2726515241","https://openalex.org/W2752337926","https://openalex.org/W2772792254","https://openalex.org/W2891145043","https://openalex.org/W2913399670","https://openalex.org/W2914447220","https://openalex.org/W2942074357","https://openalex.org/W2962958939","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2963684180","https://openalex.org/W2963720850","https://openalex.org/W2982058372","https://openalex.org/W2984700035","https://openalex.org/W2999634272","https://openalex.org/W3007419529","https://openalex.org/W3016011581","https://openalex.org/W3034713808","https://openalex.org/W3034900344","https://openalex.org/W3034909238","https://openalex.org/W3045675435","https://openalex.org/W3081492798","https://openalex.org/W3083246145","https://openalex.org/W3092709185","https://openalex.org/W3093010840","https://openalex.org/W3093077034","https://openalex.org/W3115311952","https://openalex.org/W3121016465","https://openalex.org/W3175342695","https://openalex.org/W3176990204","https://openalex.org/W3197123936","https://openalex.org/W3208526032","https://openalex.org/W4200240631","https://openalex.org/W4214612132","https://openalex.org/W4214661097","https://openalex.org/W4214691743","https://openalex.org/W4283817039","https://openalex.org/W4292787336","https://openalex.org/W4295046123","https://openalex.org/W4297841641","https://openalex.org/W4312095900","https://openalex.org/W4312095999","https://openalex.org/W4320882980","https://openalex.org/W4360993864","https://openalex.org/W4385478414","https://openalex.org/W4385801058","https://openalex.org/W4385805162","https://openalex.org/W4385999783","https://openalex.org/W4386076652","https://openalex.org/W4386267173","https://openalex.org/W4386524605","https://openalex.org/W4392942875","https://openalex.org/W4402112503","https://openalex.org/W4402774452","https://openalex.org/W4402917263","https://openalex.org/W4403780752","https://openalex.org/W4404239158","https://openalex.org/W4404877539","https://openalex.org/W4408780318"],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"manipulations":[1],"(also":[2],"known":[3],"as":[4,110],"audio-visual":[5],"deepfakes)":[6],"make":[7],"it":[8],"difficult":[9],"for":[10,70,95],"unimodal":[11,63],"deepfake":[12,165],"detectors":[13],"to":[14,36,87,124,152],"detect":[15],"forgeries":[16],"in":[17],"multimedia":[18],"content.":[19],"To":[20],"avoid":[21],"the":[22,102,126,130,142,164,184],"spread":[23],"of":[24,55],"false":[25],"propaganda":[26],"and":[27,66,92,113,117,132,156,159,178,186],"fake":[28],"news,":[29],"timely":[30],"detection":[31],"is":[32],"crucial.":[33],"The":[34],"damage":[35],"either":[37],"modality":[38],"(i.e.,":[39],"visual":[40,93,112,133,139],"or":[41],"audio)":[42],"can":[43,51],"only":[44,137],"be":[45],"discovered":[46],"through":[47],"multimodal":[48,82,96],"models":[49,177],"that":[50,171],"exploit":[52,88,153],"both":[53],"pieces":[54],"information":[56],"simultaneously.":[57],"However,":[58],"previous":[59],"methods":[60],"mainly":[61],"adopt":[62,147],"video":[64,97,150],"forensics":[65],"use":[67,101],"supervised":[68],"pretraining":[69],"forgery":[71,98],"detection.":[72,99],"This":[73],"study":[74],"proposes":[75],"a":[76,81,111,118],"new":[77,180],"method":[78],"based":[79],"on":[80,183],"self-supervised-learning":[83],"(SSL)":[84],"feature":[85,115],"extractor":[86,116],"inconsistency":[89],"between":[90,129],"audio":[91,131],"modalities":[94],"We":[100],"transformer-based":[103,149],"SSL":[104],"pretrained":[105],"Audio-Visual":[106],"HuBERT":[107],"(AV-HuBERT)":[108],"model":[109,151,173],"acoustic":[114],"multiscale":[119],"temporal":[120,127,160],"convolutional":[121],"neural":[122],"network":[123],"capture":[125,157],"correlation":[128],"modalities.":[134],"Since":[135],"AV-HuBERT":[136],"extracts":[138],"features":[140,155],"from":[141],"lip":[143],"region,":[144],"we":[145],"also":[146],"another":[148],"facial":[154],"spatial":[158],"artifacts":[161],"caused":[162],"during":[163],"generation":[166],"process.":[167],"Experimental":[168],"results":[169],"show":[170],"our":[172],"outperforms":[174],"all":[175],"existing":[176],"achieves":[179],"state-of-the-art":[181],"performance":[182],"FakeAVCeleb":[185],"DeepfakeTIMIT":[187],"datasets.":[188]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2025-12-03T23:09:05.601824","created_date":"2025-10-24T00:00:00"}
