{"id":"https://openalex.org/W7125901270","doi":"https://doi.org/10.1109/smc58881.2025.11342953","title":"Audio-Visual Cross-Attention for Improved Deepfake Video Detection and Forgery Localization","display_name":"Audio-Visual Cross-Attention for Improved Deepfake Video Detection and Forgery Localization","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125901270","doi":"https://doi.org/10.1109/smc58881.2025.11342953"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11342953","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342953","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124066864","display_name":"Oussama Jalleli","orcid":null},"institutions":[{"id":"https://openalex.org/I39481719","display_name":"Institut National de la Recherche Scientifique","ror":"https://ror.org/04td37d32","country_code":"CA","type":"education","lineage":["https://openalex.org/I39481719","https://openalex.org/I49663120"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Oussama Jalleli","raw_affiliation_strings":["INRS-EMT, Universit&#x00E9; du Qu&#x00E9;bec,Montreal,QC,Canada"],"affiliations":[{"raw_affiliation_string":"INRS-EMT, Universit&#x00E9; du Qu&#x00E9;bec,Montreal,QC,Canada","institution_ids":["https://openalex.org/I39481719"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124124940","display_name":"Yi Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I39481719","display_name":"Institut National de la Recherche Scientifique","ror":"https://ror.org/04td37d32","country_code":"CA","type":"education","lineage":["https://openalex.org/I39481719","https://openalex.org/I49663120"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Yi Zhu","raw_affiliation_strings":["INRS-EMT, Universit&#x00E9; du Qu&#x00E9;bec,Montreal,QC,Canada"],"affiliations":[{"raw_affiliation_string":"INRS-EMT, Universit&#x00E9; du Qu&#x00E9;bec,Montreal,QC,Canada","institution_ids":["https://openalex.org/I39481719"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112847522","display_name":"Tiago Henrik Falk","orcid":null},"institutions":[{"id":"https://openalex.org/I39481719","display_name":"Institut National de la Recherche Scientifique","ror":"https://ror.org/04td37d32","country_code":"CA","type":"education","lineage":["https://openalex.org/I39481719","https://openalex.org/I49663120"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Tiago H. Falk","raw_affiliation_strings":["INRS-EMT, Universit&#x00E9; du Qu&#x00E9;bec,Montreal,QC,Canada"],"affiliations":[{"raw_affiliation_string":"INRS-EMT, Universit&#x00E9; du Qu&#x00E9;bec,Montreal,QC,Canada","institution_ids":["https://openalex.org/I39481719"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5124066864"],"corresponding_institution_ids":["https://openalex.org/I39481719"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.68463656,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"75","last_page":"79"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9383000135421753,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9383000135421753,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.038600001484155655,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.0044999998062849045,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7504000067710876},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6389999985694885},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.44359999895095825},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.42309999465942383},{"id":"https://openalex.org/keywords/complementarity","display_name":"Complementarity (molecular biology)","score":0.34950000047683716},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3463999927043915}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8246999979019165},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7504000067710876},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6840999722480774},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6389999985694885},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5426999926567078},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.44359999895095825},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.42309999465942383},{"id":"https://openalex.org/C202269582","wikidata":"https://www.wikidata.org/wiki/Q2644277","display_name":"Complementarity (molecular biology)","level":2,"score":0.34950000047683716},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3463999927043915},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.33320000767707825},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.32919999957084656},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.31709998846054077},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27079999446868896},{"id":"https://openalex.org/C2983589003","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text detection","level":3,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11342953","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342953","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.492585688829422,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2982058372","https://openalex.org/W3176641851","https://openalex.org/W4225527248","https://openalex.org/W4312769131","https://openalex.org/W4381198892","https://openalex.org/W4385822395","https://openalex.org/W4386102876","https://openalex.org/W4394625735","https://openalex.org/W4402111996","https://openalex.org/W4402351686","https://openalex.org/W4402774452","https://openalex.org/W4415796872"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,14,36,84,104,124,127],"emergence":[2],"of":[3,16],"multi-modal":[4],"generative":[5],"models,":[6],"synthesized":[7],"videos":[8],"are":[9],"becoming":[10],"increasingly":[11],"realistic,":[12],"making":[13],"detection":[15,24,117],"deepfakes":[17],"extremely":[18],"challenging.":[19],"While":[20],"several":[21],"video":[22,62],"deepfake":[23,116],"models":[25],"have":[26],"shown":[27],"promising":[28],"performance,":[29],"their":[30],"focus":[31],"has":[32],"been":[33],"primarily":[34],"on":[35,110,122],"visual":[37,50,128],"modality.":[38],"To":[39],"overcome":[40],"this":[41,96],"limitation,":[42],"we":[43,67,98],"propose":[44],"a":[45,69],"dualstream":[46],"framework":[47],"that":[48,74],"fuses":[49],"and":[51,63,89,114,129,136],"auditory":[52,130],"information":[53],"via":[54],"cross-attention":[55],"computed":[56],"between":[57,126],"embeddings":[58],"extracted":[59],"from":[60,79],"pre-trained":[61],"audio":[64],"encoders.":[65],"Additionally,":[66],"design":[68],"weakly-supervised":[70],"forgery":[71,77,92],"localization":[72,113],"head":[73],"infers":[75],"frame-level":[76,112],"scores":[78],"coarse":[80],"segment-level":[81],"labels,":[82],"minimizing":[83],"need":[85],"for":[86,91],"fine-grained":[87],"annotations":[88],"allowing":[90],"location":[93],"characterization.":[94],"In":[95],"paper,":[97],"describe":[99],"our":[100],"preliminary":[101],"results":[102],"showing":[103],"proposed":[105],"model":[106,134],"outperforming":[107],"state-of-the-art":[108],"detectors":[109],"both":[111],"sequence-level":[115],"tasks.":[118],"Ongoing":[119],"work":[120],"focuses":[121],"investigating":[123],"complementarity":[125],"modalities":[131],"to":[132],"improve":[133],"robustness":[135],"explainability.":[137]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-01-29T00:00:00"}
