{"id":"https://openalex.org/W4408311646","doi":"https://doi.org/10.1007/s11760-025-03970-7","title":"Enhancing multimodal deepfake detection with local\u2013global feature integration and diffusion models","display_name":"Enhancing multimodal deepfake detection with local\u2013global feature integration and diffusion models","publication_year":2025,"publication_date":"2025-03-11","ids":{"openalex":"https://openalex.org/W4408311646","doi":"https://doi.org/10.1007/s11760-025-03970-7"},"language":"en","primary_location":{"id":"doi:10.1007/s11760-025-03970-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11760-025-03970-7","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11760-025-03970-7.pdf","source":{"id":"https://openalex.org/S156904493","display_name":"Signal Image and Video Processing","issn_l":"1863-1703","issn":["1863-1703","1863-1711"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Signal, Image and Video Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s11760-025-03970-7.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052105927","display_name":"Muhammad Yaqoob Javed","orcid":"https://orcid.org/0000-0002-6449-1035"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Muhammad Javed","raw_affiliation_strings":["Department of Computer Science and Technology, College of Computer Science, Donghua University, Shanghai, 200022, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, College of Computer Science, Donghua University, Shanghai, 200022, China","institution_ids":["https://openalex.org/I181326427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100764320","display_name":"Zhaohui Zhang","orcid":"https://orcid.org/0000-0002-3171-7667"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhaohui Zhang","raw_affiliation_strings":["Department of Computer Science and Technology, College of Computer Science, Donghua University, Shanghai, 200022, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, College of Computer Science, Donghua University, Shanghai, 200022, China","institution_ids":["https://openalex.org/I181326427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066487939","display_name":"Fida Hussain Dahri","orcid":"https://orcid.org/0000-0003-1085-8759"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fida Hussain Dahri","raw_affiliation_strings":["School of Computer Science and Engineering, Southeast University, Nanjing, 211189, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Southeast University, Nanjing, 211189, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101867351","display_name":"Teerath Kumar","orcid":null},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Teerath Kumar","raw_affiliation_strings":["School of Computing, Dublin City University, Dublin, Ireland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Dublin City University, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100764320","https://openalex.org/A5101867351"],"corresponding_institution_ids":["https://openalex.org/I181326427","https://openalex.org/I42934936"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":30.3796,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.99791763,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"19","issue":"5","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.7629714012145996},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5782023668289185},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.5564742088317871},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41974273324012756},{"id":"https://openalex.org/keywords/biological-system","display_name":"Biological system","score":0.3952367603778839},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3769509792327881},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.17349597811698914},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10042795538902283}],"concepts":[{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.7629714012145996},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5782023668289185},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.5564742088317871},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41974273324012756},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.3952367603778839},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3769509792327881},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.17349597811698914},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10042795538902283},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11760-025-03970-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11760-025-03970-7","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11760-025-03970-7.pdf","source":{"id":"https://openalex.org/S156904493","display_name":"Signal Image and Video Processing","issn_l":"1863-1703","issn":["1863-1703","1863-1711"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Signal, Image and Video Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11760-025-03970-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11760-025-03970-7","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11760-025-03970-7.pdf","source":{"id":"https://openalex.org/S156904493","display_name":"Signal Image and Video Processing","issn_l":"1863-1703","issn":["1863-1703","1863-1711"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Signal, Image and Video Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320865","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4408311646.pdf"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W2059322891","https://openalex.org/W2097715699","https://openalex.org/W2963684180","https://openalex.org/W2982058372","https://openalex.org/W3017202367","https://openalex.org/W3017837134","https://openalex.org/W3033601652","https://openalex.org/W3080632971","https://openalex.org/W3117815017","https://openalex.org/W3135785775","https://openalex.org/W3175342695","https://openalex.org/W3197123936","https://openalex.org/W3203548104","https://openalex.org/W3205798727","https://openalex.org/W4293070540","https://openalex.org/W4308233774","https://openalex.org/W4312095999","https://openalex.org/W4319978495","https://openalex.org/W4320882980","https://openalex.org/W4380537106","https://openalex.org/W4385413584","https://openalex.org/W4385805162","https://openalex.org/W4386272941","https://openalex.org/W4386928847","https://openalex.org/W4388851105","https://openalex.org/W4389898488","https://openalex.org/W4390839437","https://openalex.org/W4390873137","https://openalex.org/W4390874447","https://openalex.org/W4392942875","https://openalex.org/W4393150006","https://openalex.org/W4394984992","https://openalex.org/W4403791323","https://openalex.org/W6849300840"],"related_works":["https://openalex.org/W3147584709","https://openalex.org/W2977677679","https://openalex.org/W1992327129","https://openalex.org/W2381986121","https://openalex.org/W2370918718","https://openalex.org/W2256933480","https://openalex.org/W2027854990","https://openalex.org/W2370081953","https://openalex.org/W2033914206","https://openalex.org/W2042327336"],"abstract_inverted_index":{"Abstract":[0],"Deepfake":[1],"detection":[2],"has":[3],"become":[4],"a":[5],"critical":[6],"challenge":[7],"nowadays":[8],"with":[9,39,55,82,132],"the":[10,142,159],"rise":[11],"of":[12,135],"sophisticated":[13],"generative":[14],"techniques":[15],"that":[16,53],"manipulate":[17],"audio-visual":[18,171],"data.":[19,172],"Existing":[20],"methods":[21],"primarily":[22],"focus":[23],"on":[24,35,141],"lip":[25],"movement":[26,77],"synchronization":[27],"using":[28],"audio":[29,83,105],"and":[30,57,78,104,118,139,146,163],"visual":[31,72,103],"features,":[32,93],"often":[33],"relying":[34],"local":[36,56,92],"feature":[37,124],"extraction":[38],"Convolutional":[40],"Neural":[41],"Networks":[42],"(CNNs).":[43],"In":[44],"this":[45],"work,":[46],"we":[47],"propose":[48],"an":[49],"enhanced":[50],"multimodal":[51],"framework":[52,128],"integrates":[54],"global":[58,98],"features":[59,73,84],"for":[60],"advanced":[61],"deepfake":[62],"detection.":[63],"Our":[64],"approach":[65],"extends":[66],"traditional":[67],"pipelines":[68],"by":[69],"introducing":[70],"additional":[71],"such":[74],"as":[75,112],"eye":[76],"facial":[79],"regions,":[80],"combined":[81],"to":[85,114],"model":[86],"cross-modal":[87],"dependencies.":[88],"While":[89],"CNNs":[90],"capture":[91],"Vision":[94],"Transformers":[95],"(ViTs)":[96],"extract":[97],"contextual":[99],"relationships":[100],"from":[101],"both":[102],"modalities.":[106],"The":[107,126],"diffusion":[108],"models":[109],"are":[110],"incorporated":[111],"pre-processors":[113],"refine":[115],"noisy":[116],"data":[117],"generate":[119],"realistic":[120],"augmentations,":[121],"ensuring":[122],"high-quality":[123],"representation.":[125],"proposed":[127],"achieves":[129],"state-of-the-art":[130],"performance,":[131],"accuracy":[133],"scores":[134],"0.9987,":[136],"0.9825,":[137],"0.9915,":[138],"0.9812":[140],"FakeAVCeleb,":[143],"AV-Deepfake1M,":[144],"TVIL,":[145],"LAV-DF":[147],"datasets,":[148],"respectively.":[149],"These":[150],"results":[151],"demonstrate":[152],"significant":[153],"improvements":[154],"over":[155],"existing":[156],"methods,":[157],"highlighting":[158],"framework\u2019s":[160],"superior":[161],"generalization":[162],"robustness":[164],"in":[165],"detecting":[166],"subtle":[167],"inconsistencies":[168],"across":[169],"manipulated":[170]},"counts_by_year":[{"year":2026,"cited_by_count":20},{"year":2025,"cited_by_count":13}],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-10-10T00:00:00"}
